1997-08-10 19:17  Philip Blundell  <Philip.Blundell@pobox.com>

	* nss/nss_db/db-XXX.c: Include <db_185.h> not <db.h>.  Somebody
	should update this to use the new db API.
	* nss/nss_db/db-netgrp.c: Likewise.
	* nss/nss_db/db-alias.c: Likewise.
	* db2/Makefile: Makefile for db-2.x in glibc.

1997-08-27 21:20  Ulrich Drepper  <drepper@cygnus.com>

	* csu/Makefile (before-compile): New goal.  Make sure abi-tag.h
	is generated.
	[$(elf)=yes] (asm-CPPFLAGS): Make sure abi-tag.h file can be found.

	* Makeconfig [$(build-omitfp)=yes] (CFLAGS-.o): Add
	-D__USE_STRING_INLINES.
	* string/string.f: Move strnlen optimization after inclusion of
	<bits/string.h>.  Include <bits/string.h> only if __USE_STRING_INLINES
	is defined.
	* sysdeps/generic/memcpy.c: Undef memcpy to allow macro of this name
	in <bits/string.h>.
	* sysdeps/generic/memset.c: Likewise.
	* sysdeps/i386/string.h: i386 optimized string functions.
	* sysdeps/i386/i486string.h: i486+ optimized string functions.

	* Makefile (subdirs): Change db to db2.
	* shlib-versions: Bump libdb verion number to 3.
	* include/db.h: Include from db2 directory.
	* include/db_185.h: New file.
	* sysdeps/i386/Makefile [$(subdirs)=db2] (CPPFLAGS): Add macros
	to provide spinlock information for db2.
	* sysdeps/m68k/m68020/Makefile: New file.  Likewise.
	* sysdeps/sparc/Makefile: New file.  Likewise.
	* sysdeps/unix/sysv/linux/Makefile [$(subdirs)=db2] (CPPFLAGS):
	Add -DHAVE_LLSEEK.
	* db2/config.h: Hand-edited config file for db2 in glibc.
	* db2/compat.h: New file from db-2.3.4.
	* db2/db.h: Likewise.
	* db2/db_185.h: Likewise.
	* db2/db_int.h: Likewise.
	* db2/makedb.c: Likewise.
	* db2/btree/bt_close.c: Likewise.
	* db2/btree/bt_compare.c: Likewise.
	* db2/btree/bt_conv.c: Likewise.
	* db2/btree/bt_cursor.c: Likewise.
	* db2/btree/bt_delete.c: Likewise.
	* db2/btree/bt_open.c: Likewise.
	* db2/btree/bt_page.c: Likewise.
	* db2/btree/bt_put.c: Likewise.
	* db2/btree/bt_rec.c: Likewise.
	* db2/btree/bt_recno.c: Likewise.
	* db2/btree/btree_auto.c: Likewise.
	* db2/btree/bt_rsearch.c: Likewise.
	* db2/btree/bt_search.c: Likewise.
	* db2/btree/bt_split.c: Likewise.
	* db2/btree/bt_stat.c: Likewise.
	* db2/btree/btree.src: Likewise.
	* db2/common/db_appinit.c: Likewise.
	* db2/common/db_err.c: Likewise.
	* db2/common/db_byteorder.c: Likewise.
	* db2/common/db_apprec.c: Likewise.
	* db2/common/db_salloc.c: Likewise.
	* db2/common/db_log2.c: Likewise.
	* db2/common/db_region.c: Likewise.
	* db2/common/db_shash.c: Likewise.
	* db2/db/db.c: Likewise.
	* db2/db/db.src: Likewise.
	* db2/db/db_conv.c: Likewise.
	* db2/db/db_dispatch.c: Likewise.
	* db2/db/db_dup.c: Likewise.
	* db2/db/db_overflow.c: Likewise.
	* db2/db/db_pr.c: Likewise.
	* db2/db/db_rec.c: Likewise.
	* db2/db/db_ret.c: Likewise.
	* db2/db/db_thread.c: Likewise.
	* db2/db/db_auto.c: Likewise.
	* db2/db185/db185.c: Likewise.
	* db2/db185/db185_int.h: Likewise.
	* db2/dbm/dbm.c: Likewise.
	* db2/hash/hash.c: Likewise.
	* db2/hash/hash.src: Likewise.
	* db2/hash/hash_page.c: Likewise.
	* db2/hash/hash_conv.c: Likewise.
	* db2/hash/hash_debug.c: Likewise.
	* db2/hash/hash_stat.c: Likewise.
	* db2/hash/hash_rec.c: Likewise.
	* db2/hash/hash_dup.c: Likewise.
	* db2/hash/hash_func.c: Likewise.
	* db2/hash/hash_auto.c: Likewise.
	* db2/include/mp.h: Likewise.
	* db2/include/btree.h: Likewise.
	* db2/include/db.h.src: Likewise.
	* db2/include/db_int.h.src: Likewise.
	* db2/include/db_shash.h: Likewise.
	* db2/include/db_swap.h: Likewise.
	* db2/include/db_185.h.src: Likewise.
	* db2/include/txn.h: Likewise.
	* db2/include/db_am.h: Likewise.
	* db2/include/shqueue.h: Likewise.
	* db2/include/hash.h: Likewise.
	* db2/include/db_dispatch.h: Likewise.
	* db2/include/lock.h: Likewise.
	* db2/include/db_page.h: Likewise.
	* db2/include/log.h: Likewise.
	* db2/include/db_auto.h: Likewise.
	* db2/include/btree_auto.h: Likewise.
	* db2/include/hash_auto.h: Likewise.
	* db2/include/log_auto.h: Likewise.
	* db2/include/txn_auto.h: Likewise.
	* db2/include/db_ext.h: Likewise.
	* db2/include/btree_ext.h: Likewise.
	* db2/include/clib_ext.h: Likewise.
	* db2/include/common_ext.h: Likewise.
	* db2/include/hash_ext.h: Likewise.
	* db2/include/lock_ext.h: Likewise.
	* db2/include/log_ext.h: Likewise.
	* db2/include/mp_ext.h: Likewise.
	* db2/include/mutex_ext.h: Likewise.
	* db2/include/os_ext.h: Likewise.
	* db2/include/txn_ext.h: Likewise.
	* db2/include/cxx_int.h: Likewise.
	* db2/include/db_cxx.h: Likewise.
	* db2/include/queue.h: Likewise.
	* db2/lock/lock.c: Likewise.
	* db2/lock/lock_conflict.c: Likewise.
	* db2/lock/lock_util.c: Likewise.
	* db2/lock/lock_deadlock.c: Likewise.
	* db2/log/log.c: Likewise.
	* db2/log/log_get.c: Likewise.
	* db2/log/log.src: Likewise.
	* db2/log/log_compare.c: Likewise.
	* db2/log/log_put.c: Likewise.
	* db2/log/log_rec.c: Likewise.
	* db2/log/log_archive.c: Likewise.
	* db2/log/log_register.c: Likewise.
	* db2/log/log_auto.c: Likewise.
	* db2/log/log_findckp.c: Likewise.
	* db2/mp/mp_bh.c: Likewise.
	* db2/mp/mp_fget.c: Likewise.
	* db2/mp/mp_fopen.c: Likewise.
	* db2/mp/mp_fput.c: Likewise.
	* db2/mp/mp_fset.c: Likewise.
	* db2/mp/mp_open.c: Likewise.
	* db2/mp/mp_region.c: Likewise.
	* db2/mp/mp_pr.c: Likewise.
	* db2/mp/mp_sync.c: Likewise.
	* db2/mutex/68020.gcc: Likewise.
	* db2/mutex/mutex.c: Likewise.
	* db2/mutex/README: Likewise.
	* db2/mutex/x86.gcc: Likewise.
	* db2/mutex/sparc.gcc: Likewise.
	* db2/mutex/uts4.cc.s: Likewise.
	* db2/mutex/alpha.dec: Likewise.
	* db2/mutex/alpha.gcc: Likewise.
	* db2/mutex/parisc.gcc: Likewise.
	* db2/mutex/parisc.hp: Likewise.
	* db2/os/db_os_abs.c: Likewise.
	* db2/os/db_os_dir.c: Likewise.
	* db2/os/db_os_fid.c: Likewise.
	* db2/os/db_os_lseek.c: Likewise.
	* db2/os/db_os_mmap.c: Likewise.
	* db2/os/db_os_open.c: Likewise.
	* db2/os/db_os_rw.c: Likewise.
	* db2/os/db_os_sleep.c: Likewise.
	* db2/os/db_os_stat.c: Likewise.
	* db2/os/db_os_unlink.c: Likewise.
	* db2/txn/txn.c: Likewise.
	* db2/txn/txn.src: Likewise.
	* db2/txn/txn_rec.c: Likewise.
	* db2/txn/txn_auto.c: Likewise.
	* db2/clib/getlong.c: Likewise.
	* db2/progs/db_archive/db_archive.c: Likewise.
	* db2/progs/db_checkpoint/db_checkpoint.c: Likewise.
	* db2/progs/db_deadlock/db_deadlock.c: Likewise.
	* db2/progs/db_dump/db_dump.c: Likewise.
	* db2/progs/db_dump185/db_dump185.c: Likewise.
	* db2/progs/db_load/db_load.c: Likewise.
	* db2/progs/db_printlog/db_printlog.c: Likewise.
	* db2/progs/db_recover/db_recover.c: Likewise.
	* db2/progs/db_stat/db_stat.c: Likewise.

	* libio/stdio.h [__cplusplus] (__STDIO_INLINE): Define as inline.

	* po/de.po, po/sv.po: Update from 2.0.5 translations.

	* sysdeps/unix/sysv/linux/netinet/tcp.h: Pretty print.

	* sunrpc/rpc/xdr.h (XDR): Don't define argument of x_destroy callback
	as const.
	* sunrpc/xdr_mem.c (xdrmem_destroy): Don't define argument as const.
	* sunrpx/xdr_rec.c (xdrrec_destroy): Likewise.
	* sunrpx/xdr_stdio.c (xdrstdio_destroy): Likewise.

1997-08-27 18:47  Ulrich Drepper  <drepper@cygnus.com>

	* sysdeps/unix/sysv/linux/if_index.c: Include <errno.h>.
	Reported by Benjamin Kosnik <bkoz@cygnus.com>.

1997-08-27 02:27  Roland McGrath  <roland@baalperazim.frob.com>

	* abi-tags: New file.
	* csu/Makefile (distribute): Remove abi-tag.h.
	($(objpfx)abi-tag.h): New target.
	* Makefile (distribute): Add abi-tags.
	* sysdeps/unix/sysv/linux/abi-tag.h: File removed.
	* sysdeps/mach/hurd/abi-tag.h: File removed.
	* sysdeps/stub/abi-tag.h: File removed.

1997-08-25  Andreas Schwab  <schwab@issan.informatik.uni-dortmund.de>

	* sysdeps/unix/make-syscalls.sh: Change output so that it
	generates compilation rules only for the currently selected object
	suffixes.

1997-08-25  Andreas Schwab  <schwab@issan.informatik.uni-dortmund.de>

	* sysdeps/m68k/dl-machine.h (RTLD_START): Switch back to previous
	section to avoid confusing the compiler.
	* sysdeps/alpha/dl-machine.h (RTLD_START): Likewise.
	* sysdeps/i386/dl-machine.h (RTLD_START): Likewise.
	* sysdeps/mips/dl-machine.h (RTLD_START): Likewise.
	* sysdeps/mips/mips64/dl-machine.h (RTLD_START): Likewise.
	* sysdeps/sparc/sparc32/dl-machine.h (RTLD_START): Likewise.

	* sysdeps/m68k/dl-machine.h (elf_machine_load_address): Use a GOT
	relocation instead of a constant to avoid text relocation.
	(ELF_MACHINE_BEFORE_RTLD_RELOC): Removed.
	(RTLD_START): Declare global labels as functions and add size
	directive.

1997-08-25 17:01  Ulrich Drepper  <drepper@cygnus.com>

	* sysdeps/i386/bits/select.h: Correct assembler versions to work even
	for descriptors >= 32.

	* stdlib/alloca.h: Don't define alloca to __alloca since if gcc
	is used __alloca is not defined to __builtin_alloca and so might
	not be available.
	Reported by Uwe Ohse <uwe@ohse.de>.

	* sysdeps/unix/sysv/linux/sys/sysmacros.h: Define macros in a special
	way if gcc is not used and so dev_t is an array.
	Reported by Uwe Ohse <uwe@ohse.de>.

1997-08-23  Andreas Schwab  <schwab@issan.informatik.uni-dortmund.de>

	* manual/libc.texinfo: Reorder chapters to match logical order.

1997-08-25 12:22  Ulrich Drepper  <drepper@cygnus.com>

	* sunrpc/rpc/xdr.h: Change name of parameters in prototypes of
	xdr_reference, xdrmem_create, and xdrstdio_create because of clash
	with g++ internal symbols.
	Patch by Sudish Joseph <sj@eng.mindspring.net>.

	* elf/dl-deps.c: Implement handling of DT_FILTER.
This commit is contained in:
Ulrich Drepper 1997-08-27 20:26:10 +00:00
parent 22be878ecb
commit 92f1da4da0
192 changed files with 48405 additions and 485 deletions

258
ChangeLog
View File

@ -1,3 +1,258 @@
1997-08-10 19:17 Philip Blundell <Philip.Blundell@pobox.com>
* nss/nss_db/db-XXX.c: Include <db_185.h> not <db.h>. Somebody
should update this to use the new db API.
* nss/nss_db/db-netgrp.c: Likewise.
* nss/nss_db/db-alias.c: Likewise.
* db2/Makefile: Makefile for db-2.x in glibc.
1997-08-27 21:20 Ulrich Drepper <drepper@cygnus.com>
* csu/Makefile (before-compile): New goal. Make sure abi-tag.h
is generated.
[$(elf)=yes] (asm-CPPFLAGS): Make sure abi-tag.h file can be found.
* Makeconfig [$(build-omitfp)=yes] (CFLAGS-.o): Add
-D__USE_STRING_INLINES.
* string/string.f: Move strnlen optimization after inclusion of
<bits/string.h>. Include <bits/string.h> only if __USE_STRING_INLINES
is defined.
* sysdeps/generic/memcpy.c: Undef memcpy to allow macro of this name
in <bits/string.h>.
* sysdeps/generic/memset.c: Likewise.
* sysdeps/i386/string.h: i386 optimized string functions.
* sysdeps/i386/i486string.h: i486+ optimized string functions.
* Makefile (subdirs): Change db to db2.
* shlib-versions: Bump libdb verion number to 3.
* include/db.h: Include from db2 directory.
* include/db_185.h: New file.
* sysdeps/i386/Makefile [$(subdirs)=db2] (CPPFLAGS): Add macros
to provide spinlock information for db2.
* sysdeps/m68k/m68020/Makefile: New file. Likewise.
* sysdeps/sparc/Makefile: New file. Likewise.
* sysdeps/unix/sysv/linux/Makefile [$(subdirs)=db2] (CPPFLAGS):
Add -DHAVE_LLSEEK.
* db2/config.h: Hand-edited config file for db2 in glibc.
* db2/compat.h: New file from db-2.3.4.
* db2/db.h: Likewise.
* db2/db_185.h: Likewise.
* db2/db_int.h: Likewise.
* db2/makedb.c: Likewise.
* db2/btree/bt_close.c: Likewise.
* db2/btree/bt_compare.c: Likewise.
* db2/btree/bt_conv.c: Likewise.
* db2/btree/bt_cursor.c: Likewise.
* db2/btree/bt_delete.c: Likewise.
* db2/btree/bt_open.c: Likewise.
* db2/btree/bt_page.c: Likewise.
* db2/btree/bt_put.c: Likewise.
* db2/btree/bt_rec.c: Likewise.
* db2/btree/bt_recno.c: Likewise.
* db2/btree/btree_auto.c: Likewise.
* db2/btree/bt_rsearch.c: Likewise.
* db2/btree/bt_search.c: Likewise.
* db2/btree/bt_split.c: Likewise.
* db2/btree/bt_stat.c: Likewise.
* db2/btree/btree.src: Likewise.
* db2/common/db_appinit.c: Likewise.
* db2/common/db_err.c: Likewise.
* db2/common/db_byteorder.c: Likewise.
* db2/common/db_apprec.c: Likewise.
* db2/common/db_salloc.c: Likewise.
* db2/common/db_log2.c: Likewise.
* db2/common/db_region.c: Likewise.
* db2/common/db_shash.c: Likewise.
* db2/db/db.c: Likewise.
* db2/db/db.src: Likewise.
* db2/db/db_conv.c: Likewise.
* db2/db/db_dispatch.c: Likewise.
* db2/db/db_dup.c: Likewise.
* db2/db/db_overflow.c: Likewise.
* db2/db/db_pr.c: Likewise.
* db2/db/db_rec.c: Likewise.
* db2/db/db_ret.c: Likewise.
* db2/db/db_thread.c: Likewise.
* db2/db/db_auto.c: Likewise.
* db2/db185/db185.c: Likewise.
* db2/db185/db185_int.h: Likewise.
* db2/dbm/dbm.c: Likewise.
* db2/hash/hash.c: Likewise.
* db2/hash/hash.src: Likewise.
* db2/hash/hash_page.c: Likewise.
* db2/hash/hash_conv.c: Likewise.
* db2/hash/hash_debug.c: Likewise.
* db2/hash/hash_stat.c: Likewise.
* db2/hash/hash_rec.c: Likewise.
* db2/hash/hash_dup.c: Likewise.
* db2/hash/hash_func.c: Likewise.
* db2/hash/hash_auto.c: Likewise.
* db2/include/mp.h: Likewise.
* db2/include/btree.h: Likewise.
* db2/include/db.h.src: Likewise.
* db2/include/db_int.h.src: Likewise.
* db2/include/db_shash.h: Likewise.
* db2/include/db_swap.h: Likewise.
* db2/include/db_185.h.src: Likewise.
* db2/include/txn.h: Likewise.
* db2/include/db_am.h: Likewise.
* db2/include/shqueue.h: Likewise.
* db2/include/hash.h: Likewise.
* db2/include/db_dispatch.h: Likewise.
* db2/include/lock.h: Likewise.
* db2/include/db_page.h: Likewise.
* db2/include/log.h: Likewise.
* db2/include/db_auto.h: Likewise.
* db2/include/btree_auto.h: Likewise.
* db2/include/hash_auto.h: Likewise.
* db2/include/log_auto.h: Likewise.
* db2/include/txn_auto.h: Likewise.
* db2/include/db_ext.h: Likewise.
* db2/include/btree_ext.h: Likewise.
* db2/include/clib_ext.h: Likewise.
* db2/include/common_ext.h: Likewise.
* db2/include/hash_ext.h: Likewise.
* db2/include/lock_ext.h: Likewise.
* db2/include/log_ext.h: Likewise.
* db2/include/mp_ext.h: Likewise.
* db2/include/mutex_ext.h: Likewise.
* db2/include/os_ext.h: Likewise.
* db2/include/txn_ext.h: Likewise.
* db2/include/cxx_int.h: Likewise.
* db2/include/db_cxx.h: Likewise.
* db2/include/queue.h: Likewise.
* db2/lock/lock.c: Likewise.
* db2/lock/lock_conflict.c: Likewise.
* db2/lock/lock_util.c: Likewise.
* db2/lock/lock_deadlock.c: Likewise.
* db2/log/log.c: Likewise.
* db2/log/log_get.c: Likewise.
* db2/log/log.src: Likewise.
* db2/log/log_compare.c: Likewise.
* db2/log/log_put.c: Likewise.
* db2/log/log_rec.c: Likewise.
* db2/log/log_archive.c: Likewise.
* db2/log/log_register.c: Likewise.
* db2/log/log_auto.c: Likewise.
* db2/log/log_findckp.c: Likewise.
* db2/mp/mp_bh.c: Likewise.
* db2/mp/mp_fget.c: Likewise.
* db2/mp/mp_fopen.c: Likewise.
* db2/mp/mp_fput.c: Likewise.
* db2/mp/mp_fset.c: Likewise.
* db2/mp/mp_open.c: Likewise.
* db2/mp/mp_region.c: Likewise.
* db2/mp/mp_pr.c: Likewise.
* db2/mp/mp_sync.c: Likewise.
* db2/mutex/68020.gcc: Likewise.
* db2/mutex/mutex.c: Likewise.
* db2/mutex/README: Likewise.
* db2/mutex/x86.gcc: Likewise.
* db2/mutex/sparc.gcc: Likewise.
* db2/mutex/uts4.cc.s: Likewise.
* db2/mutex/alpha.dec: Likewise.
* db2/mutex/alpha.gcc: Likewise.
* db2/mutex/parisc.gcc: Likewise.
* db2/mutex/parisc.hp: Likewise.
* db2/os/db_os_abs.c: Likewise.
* db2/os/db_os_dir.c: Likewise.
* db2/os/db_os_fid.c: Likewise.
* db2/os/db_os_lseek.c: Likewise.
* db2/os/db_os_mmap.c: Likewise.
* db2/os/db_os_open.c: Likewise.
* db2/os/db_os_rw.c: Likewise.
* db2/os/db_os_sleep.c: Likewise.
* db2/os/db_os_stat.c: Likewise.
* db2/os/db_os_unlink.c: Likewise.
* db2/txn/txn.c: Likewise.
* db2/txn/txn.src: Likewise.
* db2/txn/txn_rec.c: Likewise.
* db2/txn/txn_auto.c: Likewise.
* db2/clib/getlong.c: Likewise.
* db2/progs/db_archive/db_archive.c: Likewise.
* db2/progs/db_checkpoint/db_checkpoint.c: Likewise.
* db2/progs/db_deadlock/db_deadlock.c: Likewise.
* db2/progs/db_dump/db_dump.c: Likewise.
* db2/progs/db_dump185/db_dump185.c: Likewise.
* db2/progs/db_load/db_load.c: Likewise.
* db2/progs/db_printlog/db_printlog.c: Likewise.
* db2/progs/db_recover/db_recover.c: Likewise.
* db2/progs/db_stat/db_stat.c: Likewise.
* libio/stdio.h [__cplusplus] (__STDIO_INLINE): Define as inline.
* po/de.po, po/sv.po: Update from 2.0.5 translations.
* sysdeps/unix/sysv/linux/netinet/tcp.h: Pretty print.
* sunrpc/rpc/xdr.h (XDR): Don't define argument of x_destroy callback
as const.
* sunrpc/xdr_mem.c (xdrmem_destroy): Don't define argument as const.
* sunrpx/xdr_rec.c (xdrrec_destroy): Likewise.
* sunrpx/xdr_stdio.c (xdrstdio_destroy): Likewise.
1997-08-27 18:47 Ulrich Drepper <drepper@cygnus.com>
* sysdeps/unix/sysv/linux/if_index.c: Include <errno.h>.
Reported by Benjamin Kosnik <bkoz@cygnus.com>.
1997-08-27 02:27 Roland McGrath <roland@baalperazim.frob.com>
* abi-tags: New file.
* csu/Makefile (distribute): Remove abi-tag.h.
($(objpfx)abi-tag.h): New target.
* Makefile (distribute): Add abi-tags.
* sysdeps/unix/sysv/linux/abi-tag.h: File removed.
* sysdeps/mach/hurd/abi-tag.h: File removed.
* sysdeps/stub/abi-tag.h: File removed.
1997-08-25 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de>
* sysdeps/unix/make-syscalls.sh: Change output so that it
generates compilation rules only for the currently selected object
suffixes.
1997-08-25 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de>
* sysdeps/m68k/dl-machine.h (RTLD_START): Switch back to previous
section to avoid confusing the compiler.
* sysdeps/alpha/dl-machine.h (RTLD_START): Likewise.
* sysdeps/i386/dl-machine.h (RTLD_START): Likewise.
* sysdeps/mips/dl-machine.h (RTLD_START): Likewise.
* sysdeps/mips/mips64/dl-machine.h (RTLD_START): Likewise.
* sysdeps/sparc/sparc32/dl-machine.h (RTLD_START): Likewise.
* sysdeps/m68k/dl-machine.h (elf_machine_load_address): Use a GOT
relocation instead of a constant to avoid text relocation.
(ELF_MACHINE_BEFORE_RTLD_RELOC): Removed.
(RTLD_START): Declare global labels as functions and add size
directive.
1997-08-25 17:01 Ulrich Drepper <drepper@cygnus.com>
* sysdeps/i386/bits/select.h: Correct assembler versions to work even
for descriptors >= 32.
* stdlib/alloca.h: Don't define alloca to __alloca since if gcc
is used __alloca is not defined to __builtin_alloca and so might
not be available.
Reported by Uwe Ohse <uwe@ohse.de>.
* sysdeps/unix/sysv/linux/sys/sysmacros.h: Define macros in a special
way if gcc is not used and so dev_t is an array.
Reported by Uwe Ohse <uwe@ohse.de>.
1997-08-23 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de>
* manual/libc.texinfo: Reorder chapters to match logical order.
1997-08-25 12:22 Ulrich Drepper <drepper@cygnus.com>
* sunrpc/rpc/xdr.h: Change name of parameters in prototypes of
xdr_reference, xdrmem_create, and xdrstdio_create because of clash
with g++ internal symbols.
Patch by Sudish Joseph <sj@eng.mindspring.net>.
1997-08-24 Miles Bader <miles@gnu.ai.mit.edu>
* string/argz.h: Add missing __END_DECLS.
@ -10,7 +265,7 @@
* csu/Makefile (initfini.s): Disable optimization.
* elf/dl-deps.c: Implement handling of DL_FILTER.
* elf/dl-deps.c: Implement handling of DT_FILTER.
* elf/dl-load.c (_dl_init_paths): Add error check.
@ -1673,7 +1928,6 @@
* sysdeps/mips/dl-machine.h: Remove extra stuff.
1997-07-06 07:18 Geoff Keating <geoffk@ozemail.com.au>
* sysdeps/powerpc/bits/endian.h: Handle multiple endianess.
* stdlib/grouping.h: Suppress gcc warning about testing

128
INSTALL
View File

@ -994,3 +994,131 @@ parts of the library were contributed or worked on by other people.
OF SUCH DAMAGE.
If these license terms cause you a real problem, contact the author.
+
* The `db' library is taken from the db-2.3.4 distribution by Sleepycat
Software, and is covered by the following terms:
/*-
* @(#)LICENSE 10.4 (Sleepycat) 7/24/97
*/
The following are the copyrights and redistribution conditions
that apply to this copy of the DB software. For a license to use,
redistribute or sell DB software under conditions other than those
described here, or to purchase support for this software, please
contact Sleepycat Software at one of the following addresses:
Sleepycat Software db@sleepycat.com
394 E. Riding Dr. +1-508-287-4781
Carlisle, MA 01741
USA
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996, 1997
* Sleepycat Software. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* 3. Redistributions in any form must be accompanied by information on
* how to obtain complete source code for the DB software and any
* accompanying software that uses the DB software. The source code
* must either be included in the distribution or be available for
* no more than the cost of distribution plus a nominal fee, and
* must be freely redistributable under reasonable conditions. For
* an executable file, complete source code means the source code
* for all modules it contains. It does not mean source code for
* modules or files that typically accompany the operating system
* on which the executable file runs, e.g., standard library
* modules or system header files.
*
* THIS SOFTWARE IS PROVIDED BY SLEEPYCAT SOFTWARE ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SLEEPYCAT
* SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995
* The Regents of the University of California. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 1995, 1996
* The President and Fellows of Harvard University. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgement:
* This product includes software developed by Harvard University
* and its contributors.
* 4. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY HARVARD AND ITS CONTRIBUTORS ``AS IS''
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL HARVARD OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

View File

@ -554,7 +554,7 @@ ifeq (yes,$(build-omitfp))
# library with debugging information. The debuggable objects are named foo.og.
object-suffixes += .og
CFLAGS-.og = -g
CFLAGS-.o = -g0 -O99 -fomit-frame-pointer
CFLAGS-.o = -g0 -O99 -fomit-frame-pointer -D__USE_STRING_INLINES
CFLAGS-.os += $(CFLAGS-.o)
libtype.og = lib%_g.a
endif

View File

@ -51,7 +51,7 @@ sysdep-subdirs := $(subst $(\n), ,$(sysdep-subdirs))
endif
# These are the subdirectories containing the library source.
subdirs = csu assert ctype db locale intl catgets math setjmp signal stdlib \
subdirs = csu assert ctype db2 locale intl catgets math setjmp signal stdlib \
stdio-common $(stdio) malloc string wcsmbs time dirent grp pwd \
posix io termios resource misc socket sysvipc gmon gnulib \
wctype manual shadow md5-crypt nss $(sysdep-subdirs) po argp \
@ -304,7 +304,7 @@ distribute := README INSTALL FAQ NOTES NEWS PROJECTS BUGS \
config.h.in config.make.in config-name.in Makefile.in \
autolock.sh rellns-sh munch-tmpl.c munch.awk interp.c \
sysdep.h set-hooks.h libc-symbols.h version.h shlib-versions \
rpm/Makefile rpm/template rpm/rpmrc glibcbug.in
rpm/Makefile rpm/template rpm/rpmrc glibcbug.in abi-tags
distribute := $(strip $(distribute))
generated := $(generated) stubs.h version-info.h

24
abi-tags Normal file
View File

@ -0,0 +1,24 @@
# This file defines the ABI tag value we will use in the ELF note included
# in the startup code to be linked into every program.
# The following lines list regexps matching canonical configurations, and
# the associated ABI tag values. The entire list is processed, with
# earlier entries taking precedence over later entries. So loose patterns
# at the end of the list can give defaults.
# The ABI tag values we use are 32-bit quantities stored in machine byte order.
# Conventionally the high-order byte indicates the OS and the low three
# bytes form a version number associated with a particular ABI version.
# After the configuration regexp, four integers in C syntax appear
# surrounded by any whitespace or punctuation, one for each byte, MSB first.
# Configuration ABI OS ABI version
# ------------- ------ -----------
.*-.*-linux.* 0 2.0.0 # earliest compatible kernel version
.*-.*-gnu-gnu.* 1 0.0.0
# There is no catch-all default here because every supported OS that uses
# ELF must have its own unique ABI tag.

15
config.guess vendored
View File

@ -506,6 +506,7 @@ EOF
ret \$31,(\$26),1
.end main
EOF
LIBC=""
${CC-cc} dummy.s -o dummy 2>/dev/null
if test "$?" = 0 ; then
./dummy
@ -516,10 +517,16 @@ EOF
2)
UNAME_MACHINE="alphaev56"
;;
esac
fi
rm -f dummy.s dummy
echo ${UNAME_MACHINE}-unknown-linux-gnu ; exit 0
esac
objdump --private-headers dummy | \
grep ld.so.1 > /dev/null
if test "$?" = 0 ; then
LIBC="libc1"
fi
fi
rm -f dummy.s dummy
echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} ; exit 0
elif test "${UNAME_MACHINE}" = "mips" ; then
cat >dummy.c <<EOF
main(argc, argv)

View File

@ -32,11 +32,12 @@ csu-dummies = $(filter-out $(start-installed-name),crt1.o Mcrt1.o)
extra-objs = start.o gmon-start.o \
$(start-installed-name) g$(start-installed-name) \
$(csu-dummies)
before-compile = $(objpfx)abi-tag.h
omit-deps = $(patsubst %.o,%,$(start-installed-name) g$(start-installed-name) \
$(csu-dummies))
install-lib = $(start-installed-name) g$(start-installed-name) \
$(csu-dummies)
distribute = initfini.c gmon-start.c start.c defs.awk abi-note.S abi-tag.h
distribute = initfini.c gmon-start.c start.c defs.awk abi-note.S
all: # Make this the default target; it will be defined in Rules.
@ -85,6 +86,7 @@ endif
ifeq (yes,$(elf))
extra-objs += abi-note.o
asm-CPPFLAGS += -I$(objpfx).
endif
include ../Rules
@ -121,3 +123,15 @@ $(addprefix $(objpfx),$(filter-out $(start-installed-name),$(csu-dummies))):
cp /dev/null $(@:.o=.c)
$(COMPILE.c) $(@:.o=.c) $(OUTPUT_OPTION)
rm -f $(@:.o=.c)
/ := $$/# bite me.
$(objpfx)abi-tag.h: $(..)abi-tags
rm -f $@.new
sed 's/#.*$//;/^[ ]*$$/d' $< | while read conf tag; do \
test `expr '$(config-machine)-$(config-vendor)-$(config-os)' \
: "$$conf"` != 0 || continue; \
echo "$$tag" | sed > $@.new \
's/[^0-9xXa-fA-F]/ /g;s/ *$//;s/ /,/g;s/^ */#define ABI_TAG /';\
done
if test -r $@.new; then mv -f $@.new $@; \
else echo >&2 'This configuration not matched in $<'; exit 1; fi

90
db2/Makefile Normal file
View File

@ -0,0 +1,90 @@
# Copyright (C) 1991, 92, 93, 94, 95, 96, 97 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
# You should have received a copy of the GNU Library General Public
# License along with the GNU C Library; see the file COPYING.LIB. If not,
# write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
#
# Sub-makefile for libdb.
#
# The code is lifted straight from the db 2.3.4 distribution
# with minimal changes.
#
subdir = db2
subdir-dirs = btree common db db185 dbm hash lock log mp mutex os txn \
progs/db_archive progs/db_checkpoint progs/db_deadlock \
progs/db_dump progs/db_dump185 progs/db_load progs/db_printlog \
progs/db_recover progs/db_stat clib
vpath %.c $(subdir-dirs)
extra-libs := libdb
extra-libs-others := $(extra-libs)
libdb-routines := bt_close bt_compare bt_conv bt_cursor bt_delete \
bt_open bt_page bt_put bt_rec bt_recno bt_rsearch bt_search \
bt_split bt_stat btree_auto db db_appinit db_apprec \
db_auto \
db_byteorder db_conv db_dispatch db_dup db_err db_log2 \
db_os_abs db_os_dir db_os_fid db_os_lseek db_os_mmap \
db_os_open db_os_rw db_os_sleep db_os_stat db_os_unlink \
db_overflow db_pr db_rec db_region db_ret db_salloc \
db_shash db_thread hash hash_auto hash_conv hash_debug \
hash_dup hash_func hash_page hash_rec hash_stat lock \
lock_conflict lock_deadlock lock_util log log_archive \
log_auto log_compare log_findckp log_get log_put log_rec \
log_register mp_bh mp_fget mp_fopen mp_fput mp_fset \
mp_open mp_pr mp_region mp_sync mutex txn txn_auto \
txn_rec dbm db185
others := makedb db_dump185 db_archive db_checkpoint db_deadlock \
db_dump db_load db_recover db_stat
install-bin := makedb db_dump185 db_archive db_checkpoint db_deadlock \
db_dump db_load db_recover db_stat
include ../Rules
CPPFLAGS += -I./include -include ./compat.h
$(objpfx)db_checkpoint: $(objpfx)getlong.o
$(objpfx)db_deadlock: $(objpfx)getlong.o
$(objpfx)db_load: $(objpfx)getlong.o
ifeq ($(build-shared),yes)
$(objpfx)makedb: $(objpfx)libdb.so$(libdb.so-version)
$(objpfx)db_dump185: $(objpfx)libdb.so$(libdb.so-version)
$(objpfx)db_archive: $(objpfx)libdb.so$(libdb.so-version)
$(objpfx)db_checkpoint: $(objpfx)libdb.so$(libdb.so-version)
$(objpfx)db_deadlock: $(objpfx)libdb.so$(libdb.so-version)
$(objpfx)db_dump: $(objpfx)libdb.so$(libdb.so-version)
$(objpfx)db_load: $(objpfx)libdb.so$(libdb.so-version)
$(objpfx)db_recover: $(objpfx)libdb.so$(libdb.so-version)
$(objpfx)db_stat: $(objpfx)libdb.so$(libdb.so-version)
else
$(objpfx)makedb: $(objpfx)libdb.a
$(objpfx)db_dump185: $(objpfx)libdb.a
$(objpfx)db_archive: $(objpfx)libdb.a
$(objpfx)db_checkpoint: $(objpfx)libdb.a
$(objpfx)db_deadlock: $(objpfx)libdb.a
$(objpfx)db_dump: $(objpfx)libdb.a
$(objpfx)db_load: $(objpfx)libdb.a
$(objpfx)db_recover: $(objpfx)libdb.a
$(objpfx)db_stat: $(objpfx)libdb.a
endif
# Depend on libc.so so a DT_NEEDED is generated in the shared objects.
$(objpfx)libdb.so: $(common-objpfx)libc.so

184
db2/btree/bt_close.c Normal file
View File

@ -0,0 +1,184 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
* Keith Bostic. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)bt_close.c 10.22 (Sleepycat) 8/23/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <sys/mman.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "btree.h"
static void __bam_upstat __P((DB *dbp));
/*
* __bam_close --
* Close a btree.
*
* PUBLIC: int __bam_close __P((DB *));
*/
int
__bam_close(dbp)
DB *dbp;
{
BTREE *t;
DEBUG_LWRITE(dbp, NULL, "bam_close", NULL, NULL, 0);
t = dbp->internal;
/* Update tree statistics. */
__bam_upstat(dbp);
/* Free any allocated memory. */
if (t->bt_rkey.data)
FREE(t->bt_rkey.data, t->bt_rkey.size);
if (t->bt_rdata.data)
FREE(t->bt_rdata.data, t->bt_rdata.ulen);
if (t->bt_sp != t->bt_stack)
FREE(t->bt_sp, (t->bt_esp - t->bt_sp) * sizeof(EPG));
FREE(t, sizeof(BTREE));
dbp->internal = NULL;
return (0);
}
/*
* __bam_sync --
* Sync the btree to disk.
*
* PUBLIC: int __bam_sync __P((DB *, int));
*/
int
__bam_sync(argdbp, flags)
DB *argdbp;
int flags;
{
DB *dbp;
int ret;
DEBUG_LWRITE(argdbp, NULL, "bam_sync", NULL, NULL, flags);
/* Check for invalid flags. */
if ((ret = __db_syncchk(argdbp, flags)) != 0)
return (ret);
/* If it wasn't possible to modify the file, we're done. */
if (F_ISSET(argdbp, DB_AM_INMEM | DB_AM_RDONLY))
return (0);
GETHANDLE(argdbp, NULL, &dbp, ret);
/* Flush any dirty pages from the cache to the backing file. */
if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
ret = 0;
PUTHANDLE(dbp);
return (ret);
}
/*
* __bam_upstat --
* Update tree statistics.
*/
static void
__bam_upstat(dbp)
DB *dbp;
{
BTREE *t;
BTMETA *meta;
DB_LOCK mlock;
db_pgno_t pgno;
int flags, ret;
/*
* We use a no-op log call to log the update of the statistics onto the
* metadata page. The dbp->close() call isn't transaction protected to
* start with, and I'm not sure what undoing a statistics update means,
* anyway.
*/
if (F_ISSET(dbp, DB_AM_INMEM | DB_AM_RDONLY))
return;
/* Lock the page. */
if (__bam_lget(dbp, 0, pgno, DB_LOCK_WRITE, &mlock) != 0)
return;
flags = 0;
pgno = PGNO_METADATA;
/* Get the page. */
if (__bam_pget(dbp, (PAGE **)&meta, &pgno, 0) == 0) {
/* Log the change. */
if (DB_LOGGING(dbp) &&
(ret = __db_noop_log(dbp->dbenv->lg_info, dbp->txn,
&LSN(meta), 0)) == 0)
goto err;
/* Update the statistics. */
t = dbp->internal;
__bam_add_mstat(&t->lstat, &meta->stat);
flags = DB_MPOOL_DIRTY;
}
err: (void)memp_fput(dbp->mpf, (PAGE *)meta, flags);
(void)__BT_LPUT(dbp, mlock);
}

205
db2/btree/bt_compare.c Normal file
View File

@ -0,0 +1,205 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
* Keith Bostic. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)bt_compare.c 10.3 (Sleepycat) 7/19/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "btree.h"
/*
* __bam_cmp --
* Compare a key to a given record.
*
* PUBLIC: int __bam_cmp __P((DB *, const DBT *, EPG *));
*/
int
__bam_cmp(dbp, k1, e)
DB *dbp;
const DBT *k1;
EPG *e;
{
BINTERNAL *bi;
BKEYDATA *bk;
BOVERFLOW *bo;
BTREE *t;
DBT k2;
PAGE *h;
t = dbp->internal;
/*
* Returns:
* < 0 if k1 is < record
* = 0 if k1 is = record
* > 0 if k1 is > record
*
* The left-most key on internal pages, at any level of the tree, is
* guaranteed, by the following code, to be less than any user key.
* This saves us from having to update the leftmost key on an internal
* page when the user inserts a new key in the tree smaller than
* anything we've yet seen.
*/
h = e->page;
if (e->indx == 0 &&
h->prev_pgno == PGNO_INVALID && TYPE(h) != P_LBTREE)
return (1);
bo = NULL;
if (TYPE(h) == P_LBTREE) {
bk = GET_BKEYDATA(h, e->indx);
if (bk->type == B_OVERFLOW)
bo = (BOVERFLOW *)bk;
else {
memset(&k2, 0, sizeof(k2));
k2.data = bk->data;
k2.size = bk->len;
}
} else {
bi = GET_BINTERNAL(h, e->indx);
if (bi->type == B_OVERFLOW)
bo = (BOVERFLOW *)(bi->data);
else {
memset(&k2, 0, sizeof(k2));
k2.data = bi->data;
k2.size = bi->len;
}
}
/*
* XXX
* We ignore system errors; the only recoverable one is ENOMEM, and we
* don't want to require that comparison routines handle random errors.
* We don't want to return a valid comparison, either, so we stop.
*/
if (bo != NULL) {
/*
* If using the default comparison routine, use __db_moff(),
* which compares the overflow key a page at a time.
*/
if (t->bt_compare == __bam_defcmp)
return (__db_moff(dbp, k1, bo->pgno));
/*
* Otherwise, we need a contiguous record so we can hand it
* to the user's routine.
*/
if (__db_goff(dbp, &k2, bo->tlen,
bo->pgno, &t->bt_rdata.data, &t->bt_rdata.ulen) != 0)
abort();
}
return ((*t->bt_compare)(k1, &k2));
}
/*
* __bam_defcmp --
* Default comparison routine.
*
* PUBLIC: int __bam_defcmp __P((const DBT *, const DBT *));
*/
int
__bam_defcmp(a, b)
const DBT *a, *b;
{
size_t len;
u_int8_t *p1, *p2;
/*
* Returns:
* < 0 if a is < b
* = 0 if a is = b
* > 0 if a is > b
*
* XXX
* If a size_t doesn't fit into a long, or if the difference between
* any two characters doesn't fit into an int, this routine can lose.
* What we need is a signed integral type that's guaranteed to be at
* least as large as a size_t, and there is no such thing.
*/
len = a->size > b->size ? b->size : a->size;
for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2)
if (*p1 != *p2)
return ((long)*p1 - (long)*p2);
return ((long)a->size - (long)b->size);
}
/*
* __bam_defpfx --
* Default prefix routine.
*
* PUBLIC: size_t __bam_defpfx __P((const DBT *, const DBT *));
*/
size_t
__bam_defpfx(a, b)
const DBT *a, *b;
{
size_t cnt, len;
u_int8_t *p1, *p2;
cnt = 1;
len = a->size > b->size ? b->size : a->size;
for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2, ++cnt)
if (*p1 != *p2)
return (cnt);
/*
* We know that a->size must be <= b->size, or they wouldn't be
* in this order.
*/
return (a->size < b->size ? a->size + 1 : a->size);
}

83
db2/btree/bt_conv.c Normal file
View File

@ -0,0 +1,83 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)bt_conv.c 10.3 (Sleepycat) 8/9/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "db_swap.h"
#include "btree.h"
/*
* __bam_pgin, __bam_pgout --
* Convert host-specific page layout to/from the host-independent
* format stored on disk.
*
* PUBLIC: int __bam_pgin __P((db_pgno_t, void *, DBT *));
* PUBLIC: int __bam_pgout __P((db_pgno_t, void *, DBT *));
*/
int
__bam_pgin(pg, pp, cookie)
db_pgno_t pg;
void *pp;
DBT *cookie;
{
DB_PGINFO *pginfo;
pginfo = (DB_PGINFO *)cookie->data;
if (!pginfo->needswap)
return (0);
return (pg == PGNO_METADATA ? __bam_mswap(pp) : __db_pgin(pg, pp));
}
int
__bam_pgout(pg, pp, cookie)
db_pgno_t pg;
void *pp;
DBT *cookie;
{
DB_PGINFO *pginfo;
pginfo = (DB_PGINFO *)cookie->data;
if (!pginfo->needswap)
return (0);
return (pg == PGNO_METADATA ? __bam_mswap(pp) : __db_pgout(pg, pp));
}
/*
* __bam_mswap --
* Swap the bytes on the btree metadata page.
*
* PUBLIC: int __bam_mswap __P((PAGE *));
*/
int
__bam_mswap(pg)
PAGE *pg;
{
u_int8_t *p;
p = (u_int8_t *)pg;
SWAP32(p); /* lsn.file */
SWAP32(p); /* lsn.offset */
SWAP32(p); /* pgno */
SWAP32(p); /* magic */
SWAP32(p); /* version */
SWAP32(p); /* pagesize */
SWAP32(p); /* maxkey */
SWAP32(p); /* minkey */
SWAP32(p); /* free */
SWAP32(p); /* flags */
return (0);
}

1577
db2/btree/bt_cursor.c Normal file

File diff suppressed because it is too large Load Diff

607
db2/btree/bt_delete.c Normal file
View File

@ -0,0 +1,607 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
* Keith Bostic. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)bt_delete.c 10.18 (Sleepycat) 8/24/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <stdio.h>
#include <string.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "btree.h"
static int __bam_dpages __P((DB *, BTREE *));
/*
* __bam_delete --
* Delete the items referenced by a key.
*
* PUBLIC: int __bam_delete __P((DB *, DB_TXN *, DBT *, int));
*/
int
__bam_delete(argdbp, txn, key, flags)
DB *argdbp;
DB_TXN *txn;
DBT *key;
int flags;
{
BTREE *t;
DB *dbp;
PAGE *h;
db_indx_t cnt, i, indx;
int dpage, exact, ret, stack;
DEBUG_LWRITE(argdbp, txn, "bam_delete", key, NULL, flags);
stack = 0;
/* Check for invalid flags. */
if ((ret =
__db_delchk(argdbp, flags, F_ISSET(argdbp, DB_AM_RDONLY))) != 0)
return (ret);
GETHANDLE(argdbp, txn, &dbp, ret);
t = dbp->internal;
/* Search the tree for the key; delete only deletes exact matches. */
if ((ret = __bam_search(dbp, key, S_DELETE, 1, NULL, &exact)) != 0)
goto err;
stack = 1;
h = t->bt_csp->page;
indx = t->bt_csp->indx;
/* Delete the key/data pair, including any duplicates. */
for (cnt = 1, i = indx;; ++cnt)
if ((i += P_INDX) >= NUM_ENT(h) || h->inp[i] != h->inp[indx])
break;
for (; cnt > 0; --cnt, ++t->lstat.bt_deleted)
if (__bam_ca_delete(dbp, h->pgno, indx, NULL) != 0) {
GET_BKEYDATA(h, indx + O_INDX)->deleted = 1;
indx += P_INDX;
} else if ((ret = __bam_ditem(dbp, h, indx)) != 0 ||
(ret = __bam_ditem(dbp, h, indx)) != 0)
goto err;
/* If we're using record numbers, update internal page record counts. */
if (F_ISSET(dbp, DB_BT_RECNUM) && (ret = __bam_adjust(dbp, t, -1)) != 0)
goto err;
/* If the page is now empty, delete it. */
dpage = NUM_ENT(h) == 0 && h->pgno != PGNO_ROOT;
__bam_stkrel(dbp);
stack = 0;
ret = dpage ? __bam_dpage(dbp, key) : 0;
err: if (stack)
__bam_stkrel(dbp);
PUTHANDLE(dbp);
return (ret);
}
/*
* __ram_delete --
* Delete the items referenced by a key.
*
* PUBLIC: int __ram_delete __P((DB *, DB_TXN *, DBT *, int));
*/
int
__ram_delete(argdbp, txn, key, flags)
DB *argdbp;
DB_TXN *txn;
DBT *key;
int flags;
{
BKEYDATA bk;
BTREE *t;
DB *dbp;
DBT hdr, data;
PAGE *h;
db_indx_t indx;
db_recno_t recno;
int exact, ret, stack;
stack = 0;
/* Check for invalid flags. */
if ((ret =
__db_delchk(argdbp, flags, F_ISSET(argdbp, DB_AM_RDONLY))) != 0)
return (ret);
GETHANDLE(argdbp, txn, &dbp, ret);
t = dbp->internal;
/* Check the user's record number and fill in as necessary. */
if ((ret = __ram_getno(argdbp, key, &recno, 0)) != 0)
goto err;
/* Search the tree for the key; delete only deletes exact matches. */
if ((ret = __bam_rsearch(dbp, &recno, S_DELETE, 1, &exact)) != 0)
goto err;
if (!exact) {
ret = DB_NOTFOUND;
goto err;
}
h = t->bt_csp->page;
indx = t->bt_csp->indx;
stack = 1;
/* If the record has already been deleted, we couldn't have found it. */
if (GET_BKEYDATA(h, indx)->deleted) {
ret = DB_KEYEMPTY;
goto done;
}
/*
* If we're not renumbering records, replace the record with a marker
* and return.
*/
if (!F_ISSET(dbp, DB_RE_RENUMBER)) {
if ((ret = __bam_ditem(dbp, h, indx)) != 0)
goto err;
bk.deleted = 1;
bk.type = B_KEYDATA;
bk.len = 0;
memset(&hdr, 0, sizeof(hdr));
hdr.data = &bk;
hdr.size = SSZA(BKEYDATA, data);
memset(&data, 0, sizeof(data));
data.data = (char *) "";
data.size = 0;
if ((ret = __db_pitem(dbp,
h, indx, BKEYDATA_SIZE(0), &hdr, &data)) != 0)
goto err;
++t->lstat.bt_deleted;
goto done;
}
/* Delete the item. */
if ((ret = __bam_ditem(dbp, h, indx)) != 0)
goto err;
++t->lstat.bt_deleted;
if (t->bt_recno != NULL)
F_SET(t->bt_recno, RECNO_MODIFIED);
/* Adjust the counts. */
__bam_adjust(dbp, t, -1);
/* Adjust the cursors. */
__ram_ca(dbp, recno, CA_DELETE);
/*
* If the page is now empty, delete it -- we have the whole tree
* locked, so there are no preparations to make. Else, release
* the pages.
*/
if (NUM_ENT(h) == 0 && h->pgno != PGNO_ROOT) {
stack = 0;
ret = __bam_dpages(dbp, t);
}
done:
err: if (stack)
__bam_stkrel(dbp);
PUTHANDLE(dbp);
return (ret);
}
/*
* __bam_ditem --
* Delete one or more entries from a page.
*
* PUBLIC: int __bam_ditem __P((DB *, PAGE *, u_int32_t));
*/
int
__bam_ditem(dbp, h, indx)
DB *dbp;
PAGE *h;
u_int32_t indx;
{
BINTERNAL *bi;
BKEYDATA *bk;
BOVERFLOW *bo;
u_int32_t nbytes;
int ret;
switch (TYPE(h)) {
case P_IBTREE:
bi = GET_BINTERNAL(h, indx);
switch (bi->type) {
case B_DUPLICATE:
case B_OVERFLOW:
nbytes = BINTERNAL_SIZE(bi->len);
goto offpage;
case B_KEYDATA:
nbytes = BKEYDATA_SIZE(bi->len);
break;
default:
return (__db_pgfmt(dbp, h->pgno));
}
break;
case P_IRECNO:
nbytes = RINTERNAL_SIZE;
break;
case P_LBTREE:
/*
* If it's a duplicate key, discard the index and don't touch
* the actual page item. This works because no data item can
* have an index that matches any other index so even if the
* data item is in an index "slot", it won't match any other
* index.
*/
if (!(indx % 2)) {
if (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
return (__bam_adjindx(dbp,
h, indx, indx - P_INDX, 0));
if (indx < (u_int32_t)(NUM_ENT(h) - P_INDX) &&
h->inp[indx] == h->inp[indx + P_INDX])
return (__bam_adjindx(dbp,
h, indx, indx + O_INDX, 0));
}
/* FALLTHROUGH */
case P_LRECNO:
bk = GET_BKEYDATA(h, indx);
switch (bk->type) {
case B_DUPLICATE:
case B_OVERFLOW:
nbytes = BOVERFLOW_SIZE;
offpage: /* Delete duplicate/offpage chains. */
bo = GET_BOVERFLOW(h, indx);
if (bo->type == B_DUPLICATE) {
if ((ret =
__db_ddup(dbp, bo->pgno, __bam_free)) != 0)
return (ret);
} else
if ((ret =
__db_doff(dbp, bo->pgno, __bam_free)) != 0)
return (ret);
break;
case B_KEYDATA:
nbytes = BKEYDATA_SIZE(bk->len);
break;
default:
return (__db_pgfmt(dbp, h->pgno));
}
break;
default:
return (__db_pgfmt(dbp, h->pgno));
}
/* Delete the item. */
if ((ret = __db_ditem(dbp, h, indx, nbytes)) != 0)
return (ret);
/* Mark the page dirty. */
return (memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY));
}
/*
* __bam_adjindx --
* Adjust an index on the page.
*
* PUBLIC: int __bam_adjindx __P((DB *, PAGE *, u_int32_t, u_int32_t, int));
*/
int
__bam_adjindx(dbp, h, indx, indx_copy, is_insert)
DB *dbp;
PAGE *h;
u_int32_t indx, indx_copy;
int is_insert;
{
db_indx_t copy;
int ret;
/* Log the change. */
if (DB_LOGGING(dbp) &&
(ret = __bam_adj_log(dbp->dbenv->lg_info, dbp->txn, &LSN(h),
0, dbp->log_fileid, PGNO(h), &LSN(h), indx, indx_copy,
(u_int32_t)is_insert)) != 0)
return (ret);
if (is_insert) {
copy = h->inp[indx_copy];
if (indx != NUM_ENT(h))
memmove(&h->inp[indx + O_INDX], &h->inp[indx],
sizeof(db_indx_t) * (NUM_ENT(h) - indx));
h->inp[indx] = copy;
++NUM_ENT(h);
} else {
--NUM_ENT(h);
if (indx != NUM_ENT(h))
memmove(&h->inp[indx], &h->inp[indx + O_INDX],
sizeof(db_indx_t) * (NUM_ENT(h) - indx));
}
/* Mark the page dirty. */
ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY);
/* Adjust the cursors. */
__bam_ca_di(dbp, h->pgno, indx, is_insert ? 1 : -1);
return (0);
}
/*
* __bam_dpage --
* Delete a page from the tree.
*
* PUBLIC: int __bam_dpage __P((DB *, const DBT *));
*/
int
__bam_dpage(dbp, key)
DB *dbp;
const DBT *key;
{
BTREE *t;
DB_LOCK lock;
PAGE *h;
db_pgno_t pgno;
int exact, level, ret;
ret = 0;
t = dbp->internal;
/*
* The locking protocol is that we acquire locks by walking down the
* tree, to avoid the obvious deadlocks.
*
* Call __bam_search to reacquire the empty leaf page, but this time
* get both the leaf page and it's parent, locked. Walk back up the
* tree, until we have the top pair of pages that we want to delete.
* Once we have the top page that we want to delete locked, lock the
* underlying pages and check to make sure they're still empty. If
* they are, delete them.
*/
for (level = LEAFLEVEL;; ++level) {
/* Acquire a page and its parent, locked. */
if ((ret =
__bam_search(dbp, key, S_WRPAIR, level, NULL, &exact)) != 0)
return (ret);
/*
* If we reach the root or the page isn't going to be empty
* when we delete one record, quit.
*/
h = t->bt_csp[-1].page;
if (h->pgno == PGNO_ROOT || NUM_ENT(h) != 1)
break;
/* Release the two locked pages. */
(void)memp_fput(dbp->mpf, t->bt_csp[-1].page, 0);
(void)__BT_TLPUT(dbp, t->bt_csp[-1].lock);
(void)memp_fput(dbp->mpf, t->bt_csp[0].page, 0);
(void)__BT_TLPUT(dbp, t->bt_csp[0].lock);
}
/*
* Leave the stack pointer one after the last entry, we may be about
* to push more items on the stack.
*/
++t->bt_csp;
/*
* t->bt_csp[-2].page is the top page, which we're not going to delete,
* and t->bt_csp[-1].page is the first page we are going to delete.
*
* Walk down the chain, acquiring the rest of the pages until we've
* retrieved the leaf page. If we find any pages that aren't going
* to be emptied by the delete, someone else added something while we
* were walking the tree, and we discontinue the delete.
*/
for (h = t->bt_csp[-1].page;;) {
if (ISLEAF(h)) {
if (NUM_ENT(h) != 0)
goto release;
break;
} else
if (NUM_ENT(h) != 1)
goto release;
/*
* Get the next page, write lock it and push it onto the stack.
* We know it's index 0, because it can only have one element.
*/
pgno = TYPE(h) == P_IBTREE ?
GET_BINTERNAL(h, 0)->pgno : GET_RINTERNAL(h, 0)->pgno;
if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_WRITE, &lock)) != 0)
goto release;
if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0)
goto release;
BT_STK_PUSH(t, h, 0, lock, ret);
if (ret != 0)
goto release;
}
BT_STK_POP(t);
return (__bam_dpages(dbp, t));
release:
/* Discard any locked pages and return. */
BT_STK_POP(t);
__bam_stkrel(dbp);
return (ret);
}
/*
* __bam_dpages --
* Delete a set of locked pages.
*/
static int
__bam_dpages(dbp, t)
DB *dbp;
BTREE *t;
{
DBT a, b;
DB_LOCK lock;
EPG *epg;
PAGE *h;
db_pgno_t pgno;
db_recno_t rcnt;
int ret;
rcnt = 0; /* XXX: Shut the compiler up. */
epg = t->bt_sp;
/*
* !!!
* There is an interesting deadlock situation here. We have to relink
* the leaf page chain around the leaf page being deleted. Consider
* a cursor walking through the leaf pages, that has the previous page
* read-locked and is waiting on a lock for the page we're deleting.
* It will deadlock here. This is a problem, because if our process is
* selected to resolve the deadlock, we'll leave an empty leaf page
* that we can never again access by walking down the tree. So, before
* we unlink the subtree, we relink the leaf page chain.
*/
if ((ret = __db_relink(dbp, t->bt_csp->page, NULL, 1)) != 0)
goto release;
/*
* We have the entire stack of deletable pages locked. Start from the
* top of the tree and move to the bottom, as it's better to release
* the inner pages as soon as possible.
*/
if ((ret = __bam_ditem(dbp, epg->page, epg->indx)) != 0)
goto release;
/*
* If we deleted the next-to-last item from the root page, the tree
* has collapsed a level. Try and write lock the remaining root + 1
* page and copy it onto the root page. If we can't get the lock,
* that's okay, the tree just stays a level deeper than we'd like.
*/
h = epg->page;
if (h->pgno == PGNO_ROOT && NUM_ENT(h) == 1) {
pgno = TYPE(epg->page) == P_IBTREE ?
GET_BINTERNAL(epg->page, 0)->pgno :
GET_RINTERNAL(epg->page, 0)->pgno;
if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_WRITE, &lock)) != 0)
goto release;
if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0)
goto release;
/* Log the change. */
if (DB_LOGGING(dbp)) {
memset(&a, 0, sizeof(a));
a.data = h;
a.size = dbp->pgsize;
memset(&b, 0, sizeof(b));
b.data = P_ENTRY(epg->page, 0);
b.size = BINTERNAL_SIZE(((BINTERNAL *)b.data)->len);
__bam_rsplit_log(dbp->dbenv->lg_info, dbp->txn,
&h->lsn, 0, dbp->log_fileid, h->pgno, &a, &b,
&epg->page->lsn);
}
/*
* Make the switch.
*
* One fixup -- if the tree has record numbers and we're not
* converting to a leaf page, we have to preserve the total
* record count.
*/
if (TYPE(h) == P_IRECNO ||
(TYPE(h) == P_IBTREE && F_ISSET(dbp, DB_BT_RECNUM)))
rcnt = RE_NREC(epg->page);
memcpy(epg->page, h, dbp->pgsize);
epg->page->pgno = PGNO_ROOT;
if (TYPE(h) == P_IRECNO ||
(TYPE(h) == P_IBTREE && F_ISSET(dbp, DB_BT_RECNUM)))
RE_NREC_SET(epg->page, rcnt);
/* Free the last page in that level of the btree. */
++t->lstat.bt_freed;
(void)__bam_free(dbp, h);
/* Adjust the cursors. */
__bam_ca_move(dbp, t, h->pgno, PGNO_ROOT);
(void)__BT_TLPUT(dbp, lock);
}
/* Release the top page in the subtree. */
(void)memp_fput(dbp->mpf, epg->page, 0);
(void)__BT_TLPUT(dbp, epg->lock);
/*
* Free the rest of the pages.
*
* XXX
* Don't bother checking for errors. We've unlinked the subtree from
* the tree, and there's no possibility of recovery.
*/
for (; ++epg <= t->bt_csp; ++t->lstat.bt_freed) {
if (NUM_ENT(epg->page) != 0)
(void)__bam_ditem(dbp, epg->page, epg->indx);
(void)__bam_free(dbp, epg->page);
(void)__BT_TLPUT(dbp, epg->lock);
}
return (0);
release:
/* Discard any remaining pages and return. */
for (; epg <= t->bt_csp; ++epg) {
(void)memp_fput(dbp->mpf, epg->page, 0);
(void)__BT_TLPUT(dbp, epg->lock);
}
return (ret);
}

355
db2/btree/bt_open.c Normal file
View File

@ -0,0 +1,355 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
* Keith Bostic. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)bt_open.c 10.20 (Sleepycat) 8/19/97";
#endif /* not lint */
/*
* Implementation of btree access method for 4.4BSD.
*
* The design here was originally based on that of the btree access method
* used in the Postgres database system at UC Berkeley. This implementation
* is wholly independent of the Postgres code.
*/
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "btree.h"
#include "common_ext.h"
static int __bam_keyalloc __P((BTREE *));
static int __bam_setmeta __P((DB *, BTREE *));
/*
* __bam_open --
* Open a btree.
*
* PUBLIC: int __bam_open __P((DB *, DBTYPE, DB_INFO *));
*/
int
__bam_open(dbp, type, dbinfo)
DB *dbp;
DBTYPE type;
DB_INFO *dbinfo;
{
BTREE *t;
int ret;
/* Allocate the btree internal structure. */
if ((t = (BTREE *)calloc(1, sizeof(BTREE))) == NULL)
return (ENOMEM);
t->bt_sp = t->bt_csp = t->bt_stack;
t->bt_esp = t->bt_stack + sizeof(t->bt_stack) / sizeof(t->bt_stack[0]);
if ((type == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM)) &&
(ret = __bam_keyalloc(t)) != 0)
goto err;
/*
* Intention is to make sure all of the user's selections are okay
* here and then use them without checking.
*/
if (dbinfo != NULL) {
/* Minimum number of keys per page. */
if (dbinfo->bt_minkey == 0)
t->bt_minkey = DEFMINKEYPAGE;
else {
if (dbinfo->bt_minkey < 2)
goto einval;
t->bt_minkey = dbinfo->bt_minkey;
}
/* Maximum number of keys per page. */
if (dbinfo->bt_maxkey == 0)
t->bt_maxkey = 0;
else {
if (dbinfo->bt_maxkey < 1)
goto einval;
t->bt_maxkey = dbinfo->bt_maxkey;
}
/*
* If no comparison, use default comparison. If no comparison
* and no prefix, use default prefix. (We can't default the
* prefix if the user supplies a comparison routine; shortening
* the keys may break their comparison algorithm.)
*/
t->bt_compare = dbinfo->bt_compare == NULL ?
__bam_defcmp : dbinfo->bt_compare;
t->bt_prefix = dbinfo->bt_prefix == NULL ?
(dbinfo->bt_compare == NULL ?
__bam_defpfx : NULL) : dbinfo->bt_prefix;
} else {
t->bt_minkey = DEFMINKEYPAGE;
t->bt_compare = __bam_defcmp;
t->bt_prefix = __bam_defpfx;
}
/* Initialize the remaining fields of the DB. */
dbp->type = type;
dbp->internal = t;
dbp->cursor = __bam_cursor;
dbp->del = __bam_delete;
dbp->get = __bam_get;
dbp->put = __bam_put;
dbp->stat = __bam_stat;
dbp->sync = __bam_sync;
/*
* The btree data structure requires that at least two key/data pairs
* can fit on a page, but other than that there's no fixed requirement.
* Translate the minimum number of items into the bytes a key/data pair
* can use before being placed on an overflow page. We calculate for
* the worst possible alignment by assuming every item requires the
* maximum alignment for padding.
*
* Recno uses the btree bt_ovflsize value -- it's close enough.
*/
t->bt_ovflsize = (dbp->pgsize - P_OVERHEAD) / (t->bt_minkey * P_INDX)
- (BKEYDATA_PSIZE(0) + ALIGN(1, 4));
/* Create a root page if new tree. */
if ((ret = __bam_setmeta(dbp, t)) != 0)
goto err;
return (0);
einval: ret = EINVAL;
err: if (t != NULL) {
/* If we allocated room for key/data return, discard it. */
if (t->bt_rkey.data != NULL)
free(t->bt_rkey.data);
FREE(t, sizeof(BTREE));
}
return (ret);
}
/*
* __bam_bdup --
* Create a BTREE handle for a threaded DB handle.
*
* PUBLIC: int __bam_bdup __P((DB *, DB *));
*/
int
__bam_bdup(orig, new)
DB *orig, *new;
{
BTREE *t, *ot;
int ret;
ot = orig->internal;
if ((t = (BTREE *)calloc(1, sizeof(*t))) == NULL)
return (ENOMEM);
/*
* !!!
* Ignore the cursor queue, only the first DB has attached cursors.
*/
t->bt_sp = t->bt_csp = t->bt_stack;
t->bt_esp = t->bt_stack + sizeof(t->bt_stack) / sizeof(t->bt_stack[0]);
if ((orig->type == DB_RECNO || F_ISSET(orig, DB_BT_RECNUM)) &&
(ret = __bam_keyalloc(t)) != 0) {
FREE(t, sizeof(*t));
return (ret);
}
t->bt_maxkey = ot->bt_maxkey;
t->bt_minkey = ot->bt_minkey;
t->bt_compare = ot->bt_compare;
t->bt_prefix = ot->bt_prefix;
t->bt_ovflsize = ot->bt_ovflsize;
/*
* !!!
* The entire RECNO structure is shared. If it breaks, the application
* was misusing it to start with.
*/
t->bt_recno = ot->bt_recno;
new->internal = t;
return (0);
}
/*
* __bam_keyalloc --
* Allocate return memory for recno keys.
*/
static int
__bam_keyalloc(t)
BTREE *t;
{
/*
* Recno keys are always the same size, and we don't want to have
* to check for space on each return. Allocate it now.
*/
if ((t->bt_rkey.data = (void *)malloc(sizeof(db_recno_t))) == NULL)
return (ENOMEM);
t->bt_rkey.ulen = sizeof(db_recno_t);
return (0);
}
/*
* __bam_setmeta --
* Check (and optionally create) a tree.
*/
static int
__bam_setmeta(dbp, t)
DB *dbp;
BTREE *t;
{
BTMETA *meta;
PAGE *root;
DB_LOCK mlock, rlock;
db_pgno_t pgno;
int ret;
/* Get, and optionally create the metadata page. */
pgno = PGNO_METADATA;
if ((ret =
__bam_lget(dbp, 0, PGNO_METADATA, DB_LOCK_WRITE, &mlock)) != 0)
return (ret);
if ((ret =
__bam_pget(dbp, (PAGE **)&meta, &pgno, DB_MPOOL_CREATE)) != 0) {
(void)__BT_LPUT(dbp, mlock);
return (ret);
}
/*
* If the magic number is correct, we're not creating the tree.
* Correct any fields that may not be right. Note, all of the
* local flags were set by db_open(3).
*/
if (meta->magic != 0) {
t->bt_maxkey = meta->maxkey;
t->bt_minkey = meta->minkey;
(void)memp_fput(dbp->mpf, (PAGE *)meta, 0);
(void)__BT_LPUT(dbp, mlock);
return (0);
}
/* Initialize the tree structure metadata information. */
ZERO_LSN(meta->lsn);
meta->pgno = PGNO_METADATA;
meta->magic = DB_BTREEMAGIC;
meta->version = DB_BTREEVERSION;
meta->pagesize = dbp->pgsize;
meta->maxkey = t->bt_maxkey;
meta->minkey = t->bt_minkey;
meta->free = PGNO_INVALID;
meta->flags = 0;
if (dbp->type == DB_RECNO)
F_SET(meta, BTM_RECNO);
if (F_ISSET(dbp, DB_AM_DUP))
F_SET(meta, BTM_DUP);
if (F_ISSET(dbp, DB_RE_FIXEDLEN))
F_SET(meta, BTM_FIXEDLEN);
if (F_ISSET(dbp, DB_BT_RECNUM))
F_SET(meta, BTM_RECNUM);
if (F_ISSET(dbp, DB_RE_RENUMBER))
F_SET(meta, BTM_RENUMBER);
meta->re_len = 0;
meta->re_pad = 0;
memcpy(meta->uid, dbp->lock.fileid, DB_FILE_ID_LEN);
/* Create and initialize a root page. */
pgno = PGNO_ROOT;
if ((ret = __bam_lget(dbp, 0, PGNO_ROOT, DB_LOCK_WRITE, &rlock)) != 0)
return (ret);
if ((ret = __bam_pget(dbp, &root, &pgno, DB_MPOOL_CREATE)) != 0) {
(void)__BT_LPUT(dbp, rlock);
return (ret);
}
P_INIT(root, dbp->pgsize, PGNO_ROOT, PGNO_INVALID,
PGNO_INVALID, 1, dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE);
ZERO_LSN(root->lsn);
/* Release the metadata and root pages. */
if ((ret = memp_fput(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY)) != 0)
return (ret);
if ((ret = memp_fput(dbp->mpf, root, DB_MPOOL_DIRTY)) != 0)
return (ret);
/*
* Flush the metadata and root pages to disk -- since the user can't
* transaction protect open, the pages have to exist during recovery.
*
* XXX
* It's not useful to return not-yet-flushed here -- convert it to
* an error.
*/
if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
ret = EINVAL;
/* Release the locks. */
(void)__BT_LPUT(dbp, mlock);
(void)__BT_LPUT(dbp, rlock);
return (ret);
}

312
db2/btree/bt_page.c Normal file
View File

@ -0,0 +1,312 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
* Keith Bostic. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)bt_page.c 10.5 (Sleepycat) 8/18/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "btree.h"
/*
* __bam_new --
* Get a new page, preferably from the freelist.
*
* PUBLIC: int __bam_new __P((DB *, u_int32_t, PAGE **));
*/
int
__bam_new(dbp, type, pagepp)
DB *dbp;
u_int32_t type;
PAGE **pagepp;
{
BTMETA *meta;
DB_LOCK mlock;
PAGE *h;
db_pgno_t pgno;
int ret;
meta = NULL;
h = NULL;
mlock = LOCK_INVALID;
pgno = PGNO_METADATA;
if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_WRITE, &mlock)) != 0)
goto err;
if ((ret = __bam_pget(dbp, (PAGE **)&meta, &pgno, 0)) != 0)
goto err;
if (meta->free == PGNO_INVALID) {
if ((ret = __bam_pget(dbp, &h, &pgno, DB_MPOOL_NEW)) != 0)
goto err;
ZERO_LSN(h->lsn);
h->pgno = pgno;
} else {
pgno = meta->free;
if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0)
goto err;
meta->free = h->next_pgno;
}
/* Log the change. */
if (DB_LOGGING(dbp)) {
if ((ret = __bam_pg_alloc_log(dbp->dbenv->lg_info, dbp->txn,
&meta->lsn, 0, dbp->log_fileid, &meta->lsn, &h->lsn,
h->pgno, (u_int32_t)type, meta->free)) != 0)
goto err;
LSN(h) = LSN(meta);
}
(void)memp_fput(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY);
(void)__BT_TLPUT(dbp, mlock);
P_INIT(h, dbp->pgsize, h->pgno, PGNO_INVALID, PGNO_INVALID, 0, type);
*pagepp = h;
return (0);
err: if (h != NULL)
(void)memp_fput(dbp->mpf, h, 0);
if (meta != NULL)
(void)memp_fput(dbp->mpf, meta, 0);
if (mlock != LOCK_INVALID)
(void)__BT_TLPUT(dbp, mlock);
return (ret);
}
/*
* __bam_free --
* Add a page to the head of the freelist.
*
* PUBLIC: int __bam_free __P((DB *, PAGE *));
*/
int
__bam_free(dbp, h)
DB *dbp;
PAGE *h;
{
BTMETA *meta;
DBT ldbt;
DB_LOCK mlock;
db_pgno_t pgno;
int is_dirty, ret, t_ret;
/*
* Retrieve the metadata page and insert the page at the head of
* the free list. If either the lock get or page get routines
* fail, then we need to put the page with which we were called
* back because our caller assumes we take care of it.
*/
is_dirty = 0;
pgno = PGNO_METADATA;
if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_WRITE, &mlock)) != 0)
goto err;
if ((ret = __bam_pget(dbp, (PAGE **)&meta, &pgno, 0)) != 0) {
(void)__BT_TLPUT(dbp, mlock);
goto err;
}
/* Log the change. */
if (DB_LOGGING(dbp)) {
memset(&ldbt, 0, sizeof(ldbt));
ldbt.data = h;
ldbt.size = P_OVERHEAD;
if ((ret = __bam_pg_free_log(dbp->dbenv->lg_info,
dbp->txn, &meta->lsn, 0, dbp->log_fileid, h->pgno,
&meta->lsn, &ldbt, meta->free)) != 0) {
(void)memp_fput(dbp->mpf, (PAGE *)meta, 0);
(void)__BT_TLPUT(dbp, mlock);
return (ret);
}
LSN(h) = LSN(meta);
}
/*
* The page should have nothing interesting on it, re-initialize it,
* leaving only the page number and the LSN.
*/
#ifdef DEBUG
{ db_pgno_t __pgno; DB_LSN __lsn;
__pgno = h->pgno;
__lsn = h->lsn;
memset(h, 0xff, dbp->pgsize);
h->pgno = __pgno;
h->lsn = __lsn;
}
#endif
P_INIT(h, dbp->pgsize, h->pgno, PGNO_INVALID, meta->free, 0, P_INVALID);
/* Link the page on the metadata free list. */
meta->free = h->pgno;
/* Discard the metadata page. */
ret = memp_fput(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY);
if ((t_ret = __BT_TLPUT(dbp, mlock)) != 0)
ret = t_ret;
/* Discard the caller's page reference. */
is_dirty = DB_MPOOL_DIRTY;
err: if ((t_ret = memp_fput(dbp->mpf, h, is_dirty)) != 0 && ret == 0)
ret = t_ret;
/*
* XXX
* We have to unlock the caller's page in the caller!
*/
return (ret);
}
#ifdef DEBUG
/*
* __bam_lt --
* Print out the list of currently held locks.
*/
int
__bam_lt(dbp)
DB *dbp;
{
DB_LOCKREQ req;
if (F_ISSET(dbp, DB_AM_LOCKING)) {
req.op = DB_LOCK_DUMP;
lock_vec(dbp->dbenv->lk_info, dbp->locker, 0, &req, 1, NULL);
}
return (0);
}
#endif
/*
* __bam_lget --
* The standard lock get call.
*
* PUBLIC: int __bam_lget __P((DB *, int, db_pgno_t, db_lockmode_t, DB_LOCK *));
*/
int
__bam_lget(dbp, do_couple, pgno, mode, lockp)
DB *dbp;
int do_couple;
db_pgno_t pgno;
db_lockmode_t mode;
DB_LOCK *lockp;
{
DB_LOCKREQ couple[2];
u_int32_t locker;
int ret;
if (!F_ISSET(dbp, DB_AM_LOCKING))
return (0);
locker = dbp->txn == NULL ? dbp->locker : dbp->txn->txnid;
dbp->lock.pgno = pgno;
/*
* If the object not currently locked, acquire the lock and return,
* otherwise, lock couple. If we fail and it's not a system error,
* convert to EAGAIN.
*/
if (do_couple) {
couple[0].op = DB_LOCK_GET;
couple[0].obj = &dbp->lock_dbt;
couple[0].mode = mode;
couple[1].op = DB_LOCK_PUT;
couple[1].lock = *lockp;
ret = lock_vec(dbp->dbenv->lk_info, locker, 0, couple, 2, NULL);
if (ret != 0) {
/* If we fail, discard the lock we held. */
__bam_lput(dbp, *lockp);
return (ret < 0 ? EAGAIN : ret);
}
*lockp = couple[0].lock;
} else {
ret = lock_get(dbp->dbenv->lk_info,
locker, 0, &dbp->lock_dbt, mode, lockp);
return (ret < 0 ? EAGAIN : ret);
}
return (0);
}
/*
* __bam_lput --
* The standard lock put call.
*
* PUBLIC: int __bam_lput __P((DB *, DB_LOCK));
*/
int
__bam_lput(dbp, lock)
DB *dbp;
DB_LOCK lock;
{
return (__BT_LPUT(dbp, lock));
}
/*
* __bam_pget --
* The standard page get call.
*
* PUBLIC: int __bam_pget __P((DB *, PAGE **, db_pgno_t *, int));
*/
int
__bam_pget(dbp, hp, pgnop, mflags)
DB *dbp;
PAGE **hp;
db_pgno_t *pgnop;
int mflags;
{
return (memp_fget((dbp)->mpf,
pgnop, mflags, hp) == 0 ? 0 : __db_pgerr(dbp, *pgnop));
}

919
db2/btree/bt_put.c Normal file
View File

@ -0,0 +1,919 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
* Keith Bostic. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)bt_put.c 10.23 (Sleepycat) 8/22/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "btree.h"
static int __bam_fixed __P((BTREE *, DBT *));
static int __bam_lookup __P((DB *, DBT *, int *));
static int __bam_ndup __P((DB *, PAGE *, u_int32_t));
static int __bam_partial __P((DB *, DBT *, PAGE *, u_int32_t));
/*
* __bam_put --
* Add a new key/data pair or replace an existing pair (btree).
*
* PUBLIC: int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
*/
int
__bam_put(argdbp, txn, key, data, flags)
DB *argdbp;
DB_TXN *txn;
DBT *key, *data;
int flags;
{
BTREE *t;
CURSOR c;
DB *dbp;
PAGE *h;
db_indx_t indx;
int exact, iflags, newkey, replace, ret, stack;
DEBUG_LWRITE(argdbp, txn, "bam_put", key, data, flags);
/* Check flags. */
if ((ret = __db_putchk(argdbp, key, data, flags,
F_ISSET(argdbp, DB_AM_RDONLY), F_ISSET(argdbp, DB_AM_DUP))) != 0)
return (ret);
GETHANDLE(argdbp, txn, &dbp, ret);
t = dbp->internal;
retry: /*
* Find the location at which to insert. The call to bt_lookup()
* leaves the returned page pinned.
*/
if ((ret = __bam_lookup(dbp, key, &exact)) != 0) {
PUTHANDLE(dbp);
return (ret);
}
h = t->bt_csp->page;
indx = t->bt_csp->indx;
stack = 1;
/*
* If an identical key is already in the tree, and DB_NOOVERWRITE is
* set, an error is returned. If an identical key is already in the
* tree and DB_NOOVERWRITE is not set, the key is either added (when
* duplicates are permitted) or an error is returned. The exception
* is when the item located is referenced by a cursor and marked for
* deletion, in which case we permit the overwrite and flag the cursor.
*/
replace = 0;
if (exact && flags == DB_NOOVERWRITE) {
if (!GET_BKEYDATA(h, indx + O_INDX)->deleted) {
ret = DB_KEYEXIST;
goto err;
}
replace = 1;
__bam_ca_replace(dbp, h->pgno, indx, REPLACE_SETUP);
}
/*
* If we're inserting into the first or last page of the tree,
* remember where we did it so we can do fast lookup next time.
*
* XXX
* Does reverse order still work (did it ever!?!?)
*/
t->bt_lpgno =
h->next_pgno == PGNO_INVALID || h->prev_pgno == PGNO_INVALID ?
h->pgno : PGNO_INVALID;
/*
* Select the arguments for __bam_iitem() and do the insert. If the
* key is an exact match, we're either adding a new duplicate at the
* end of the duplicate set, or we're replacing the data item with a
* new data item. If the key isn't an exact match, we're inserting
* a new key/data pair, before the search location.
*/
newkey = dbp->type == DB_BTREE && !exact;
if (exact) {
if (F_ISSET(dbp, DB_AM_DUP)) {
/*
* Make sure that we're not looking at a page of
* duplicates -- if so, move to the last entry on
* that page.
*/
c.page = h;
c.pgno = h->pgno;
c.indx = indx;
c.dpgno = PGNO_INVALID;
c.dindx = 0;
if ((ret =
__bam_ovfl_chk(dbp, &c, indx + O_INDX, 1)) != 0)
goto err;
if (c.dpgno != PGNO_INVALID) {
/*
* XXX
* The __bam_ovfl_chk() routine memp_fput() the
* current page and acquired a new one, but did
* not do anything about the lock we're holding.
*/
t->bt_csp->page = h = c.page;
indx = c.dindx;
}
iflags = DB_AFTER;
} else
iflags = DB_CURRENT;
} else
iflags = DB_BEFORE;
/*
* The pages we're using may be modified by __bam_iitem(), so make
* sure we reset the stack.
*/
ret = __bam_iitem(dbp,
&h, &indx, key, data, iflags, newkey ? BI_NEWKEY : 0);
t->bt_csp->page = h;
t->bt_csp->indx = indx;
switch (ret) {
case 0:
/*
* Done. Clean up the cursor, and, if we're doing record
* numbers, adjust the internal page counts.
*/
if (replace)
__bam_ca_replace(dbp, h->pgno, indx, REPLACE_SUCCESS);
if (!replace && F_ISSET(dbp, DB_BT_RECNUM))
ret = __bam_adjust(dbp, t, 1);
break;
case DB_NEEDSPLIT:
/*
* We have to split the page. Back out the cursor setup,
* discard the stack of pages, and do the split.
*/
if (replace) {
replace = 0;
__bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED);
}
(void)__bam_stkrel(dbp);
stack = 0;
if ((ret = __bam_split(dbp, key)) != 0)
break;
goto retry;
/* NOTREACHED */
default:
if (replace)
__bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED);
break;
}
err: if (stack)
(void)__bam_stkrel(dbp);
PUTHANDLE(dbp);
return (ret);
}
/*
* __bam_lookup --
* Find the right location in the tree for the key.
*/
static int
__bam_lookup(dbp, key, exactp)
DB *dbp;
DBT *key;
int *exactp;
{
BTREE *t;
DB_LOCK lock;
EPG e;
PAGE *h;
db_indx_t indx;
int cmp, ret;
t = dbp->internal;
h = NULL;
/*
* Record numbers can't be fast-tracked, we have to lock the entire
* tree.
*/
if (F_ISSET(dbp, DB_BT_RECNUM))
goto slow;
/* Check to see if we've been seeing sorted input. */
if (t->bt_lpgno == PGNO_INVALID)
goto slow;
/*
* Retrieve the page on which we did the last insert. It's okay if
* it doesn't exist, or if it's not the page type we expect, it just
* means that the world changed.
*/
if (__bam_lget(dbp, 0, t->bt_lpgno, DB_LOCK_WRITE, &lock))
goto miss;
if (__bam_pget(dbp, &h, &t->bt_lpgno, 0)) {
(void)__BT_LPUT(dbp, lock);
goto miss;
}
if (TYPE(h) != P_LBTREE)
goto miss;
if (NUM_ENT(h) == 0)
goto miss;
/*
* We have to be at the end or beginning of the tree to know that
* we're inserting in a sort order. If that's the case and we're
* in the right order in comparison to the first/last key/data pair,
* we have the right position.
*/
if (h->next_pgno == PGNO_INVALID) {
e.page = h;
e.indx = NUM_ENT(h) - P_INDX;
if ((cmp = __bam_cmp(dbp, key, &e)) >= 0) {
if (cmp > 0)
e.indx += P_INDX;
goto fast;
}
}
if (h->prev_pgno == PGNO_INVALID) {
e.page = h;
e.indx = 0;
if ((cmp = __bam_cmp(dbp, key, &e)) <= 0) {
/*
* We're doing a put, so we want to insert as the last
* of any set of duplicates.
*/
if (cmp == 0) {
for (indx = 0;
indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
h->inp[indx] == h->inp[indx + P_INDX];
indx += P_INDX);
e.indx = indx;
}
goto fast;
}
}
goto miss;
/* Set the exact match flag in case we've already inserted this key. */
fast: *exactp = cmp == 0;
/* Enter the entry in the stack. */
BT_STK_CLR(t);
BT_STK_ENTER(t, e.page, e.indx, lock, ret);
if (ret != 0)
return (ret);
++t->lstat.bt_cache_hit;
return (0);
miss: ++t->lstat.bt_cache_miss;
if (h != NULL) {
(void)memp_fput(dbp->mpf, h, 0);
(void)__BT_LPUT(dbp, lock);
}
slow: return (__bam_search(dbp, key, S_INSERT, 1, NULL, exactp));
}
/*
* OVPUT --
* Copy an overflow item onto a page.
*/
#undef OVPUT
#define OVPUT(h, indx, bo) do { \
DBT __hdr; \
memset(&__hdr, 0, sizeof(__hdr)); \
__hdr.data = &bo; \
__hdr.size = BOVERFLOW_SIZE; \
if ((ret = __db_pitem(dbp, \
h, indx, BOVERFLOW_SIZE, &__hdr, NULL)) != 0) \
return (ret); \
} while (0)
/*
* __bam_iitem --
* Insert an item into the tree.
*
* PUBLIC: int __bam_iitem __P((DB *,
* PUBLIC: PAGE **, db_indx_t *, DBT *, DBT *, int, int));
*/
int
__bam_iitem(dbp, hp, indxp, key, data, op, flags)
DB *dbp;
PAGE **hp;
db_indx_t *indxp;
DBT *key, *data;
int op, flags;
{
BTREE *t;
BKEYDATA *bk;
BOVERFLOW kbo, dbo;
DBT tdbt;
PAGE *h;
db_indx_t indx;
u_int32_t have_bytes, need_bytes, needed;
int bigkey, bigdata, dcopy, dupadjust, ret;
t = dbp->internal;
h = *hp;
indx = *indxp;
dupadjust = 0;
bk = NULL; /* XXX: Shut the compiler up. */
/*
* If it's a page of duplicates, call the common code to do the work.
*
* !!!
* Here's where the hp and indxp are important. The duplicate code
* may decide to rework/rearrange the pages and indices we're using,
* so the caller must understand that the stack has to change.
*/
if (TYPE(h) == P_DUPLICATE) {
/* Adjust the index for the new item if it's a DB_AFTER op. */
if (op == DB_AFTER)
++*indxp;
/* Remove the current item if it's a DB_CURRENT op. */
if (op == DB_CURRENT && (ret = __db_ditem(dbp, *hp, *indxp,
BKEYDATA_SIZE(GET_BKEYDATA(*hp, *indxp)->len))) != 0)
return (ret);
/* Put the new/replacement item onto the page. */
return (__db_dput(dbp, data, hp, indxp, __bam_new));
}
/*
* XXX
* Handle partial puts.
*
* This is truly awful from a performance standput. We don't optimize
* for partial puts at all, we delete the record and add it back in,
* regardless of size or if we're simply overwriting current data.
* The hash access method does this a lot better than we do, and we're
* eventually going to have to fix it.
*/
if (F_ISSET(data, DB_DBT_PARTIAL)) {
tdbt = *data;
if ((ret = __bam_partial(dbp, &tdbt, h, indx)) != 0)
return (ret);
data = &tdbt;
}
/* If it's a short fixed-length record, fix it up. */
if (F_ISSET(dbp, DB_RE_FIXEDLEN) && data->size != t->bt_recno->re_len) {
tdbt = *data;
if ((ret = __bam_fixed(t, &tdbt)) != 0)
return (ret);
data = &tdbt;
}
/*
* If the key or data item won't fit on a page, store it in the
* overflow pages.
*
* !!!
* From this point on, we have to recover the allocated overflow
* pages on error.
*/
bigkey = bigdata = 0;
if (LF_ISSET(BI_NEWKEY) && key->size > t->bt_ovflsize) {
kbo.deleted = 0;
kbo.type = B_OVERFLOW;
kbo.tlen = key->size;
if ((ret = __db_poff(dbp, key, &kbo.pgno, __bam_new)) != 0)
goto err;
bigkey = 1;
}
if (data->size > t->bt_ovflsize) {
dbo.deleted = 0;
dbo.type = B_OVERFLOW;
dbo.tlen = data->size;
if ((ret = __db_poff(dbp, data, &dbo.pgno, __bam_new)) != 0)
goto err;
bigdata = 1;
}
dcopy = 0;
needed = 0;
if (LF_ISSET(BI_NEWKEY)) {
/* If BI_NEWKEY is set we're adding a new key and data pair. */
if (bigkey)
needed += BOVERFLOW_PSIZE;
else
needed += BKEYDATA_PSIZE(key->size);
if (bigdata)
needed += BOVERFLOW_PSIZE;
else
needed += BKEYDATA_PSIZE(data->size);
} else {
/*
* We're either overwriting the data item of a key/data pair
* or we're adding the data item only, i.e. a new duplicate.
*/
if (op == DB_CURRENT) {
bk = GET_BKEYDATA(h,
indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
if (bk->type == B_OVERFLOW)
have_bytes = BOVERFLOW_PSIZE;
else
have_bytes = BKEYDATA_PSIZE(bk->len);
need_bytes = 0;
} else {
have_bytes = 0;
need_bytes = sizeof(db_indx_t);
}
if (bigdata)
need_bytes += BOVERFLOW_PSIZE;
else
need_bytes += BKEYDATA_PSIZE(data->size);
/*
* If we're overwriting a data item, we copy it if it's not a
* special record type and it's the same size (including any
* alignment) and do a delete/insert otherwise.
*/
if (op == DB_CURRENT && !bigdata &&
bk->type == B_KEYDATA && have_bytes == need_bytes)
dcopy = 1;
if (have_bytes < need_bytes)
needed += need_bytes - have_bytes;
}
/*
* If there's not enough room, or the user has put a ceiling on the
* number of keys permitted in the page, split the page.
*
* XXX
* The t->bt_maxkey test here may be insufficient -- do we have to
* check in the btree split code, so we don't undo it there!?!?
*/
if (P_FREESPACE(h) < needed ||
(t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey)) {
ret = DB_NEEDSPLIT;
goto err;
}
/*
* The code breaks it up into six cases:
*
* 1. Append a new key/data pair.
* 2. Insert a new key/data pair.
* 3. Copy the data item.
* 4. Delete/insert the data item.
* 5. Append a new data item.
* 6. Insert a new data item.
*/
if (LF_ISSET(BI_NEWKEY)) {
switch (op) {
case DB_AFTER: /* 1. Append a new key/data pair. */
indx += 2;
*indxp += 2;
break;
case DB_BEFORE: /* 2. Insert a new key/data pair. */
break;
default:
abort();
}
/* Add the key. */
if (bigkey)
OVPUT(h, indx, kbo);
else {
DBT __data;
memset(&__data, 0, sizeof(__data));
__data.data = key->data;
__data.size = key->size;
if ((ret = __db_pitem(dbp, h, indx,
BKEYDATA_SIZE(key->size), NULL, &__data)) != 0)
goto err;
}
++indx;
} else {
switch (op) {
case DB_CURRENT: /* 3. Copy the data item. */
/*
* If we're not logging and it's possible, overwrite
* the current item.
*
* XXX
* We should add a separate logging message so that
* we can do this anytime it's possible, including
* for partial record puts.
*/
if (dcopy && !DB_LOGGING(dbp)) {
bk->len = data->size;
memcpy(bk->data, data->data, data->size);
goto done;
}
/* 4. Delete/insert the data item. */
if (TYPE(h) == P_LBTREE)
++indx;
if ((ret = __bam_ditem(dbp, h, indx)) != 0)
goto err;
break;
case DB_AFTER: /* 5. Append a new data item. */
if (TYPE(h) == P_LBTREE) {
/*
* Adjust the cursor and copy in the key for
* the duplicate.
*/
if ((ret = __bam_adjindx(dbp,
h, indx + P_INDX, indx, 1)) != 0)
goto err;
indx += 3;
dupadjust = 1;
*indxp += 2;
} else {
++indx;
__bam_ca_di(dbp, h->pgno, indx, 1);
*indxp += 1;
}
break;
case DB_BEFORE: /* 6. Insert a new data item. */
if (TYPE(h) == P_LBTREE) {
/*
* Adjust the cursor and copy in the key for
* the duplicate.
*/
if ((ret =
__bam_adjindx(dbp, h, indx, indx, 1)) != 0)
goto err;
++indx;
dupadjust = 1;
} else
__bam_ca_di(dbp, h->pgno, indx, 1);
break;
default:
abort();
}
}
/* Add the data. */
if (bigdata)
OVPUT(h, indx, dbo);
else {
BKEYDATA __bk;
DBT __hdr, __data;
memset(&__data, 0, sizeof(__data));
__data.data = data->data;
__data.size = data->size;
if (LF_ISSET(BI_DELETED)) {
__bk.len = __data.size;
__bk.deleted = 1;
__bk.type = B_KEYDATA;
__hdr.data = &__bk;
__hdr.size = SSZA(BKEYDATA, data);
ret = __db_pitem(dbp, h, indx,
BKEYDATA_SIZE(__data.size), &__hdr, &__data);
} else
ret = __db_pitem(dbp, h, indx,
BKEYDATA_SIZE(data->size), NULL, &__data);
if (ret != 0)
goto err;
}
done: ++t->lstat.bt_added;
ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY);
/*
* If the page is at least 50% full, and we added a duplicate, see if
* that set of duplicates takes up at least 25% of the space. If it
* does, move it off onto its own page.
*/
if (dupadjust && P_FREESPACE(h) <= dbp->pgsize / 2) {
--indx;
if ((ret = __bam_ndup(dbp, h, indx)) != 0)
goto err;
}
if (t->bt_recno != NULL)
F_SET(t->bt_recno, RECNO_MODIFIED);
if (0) {
err: if (bigkey)
(void)__db_doff(dbp, kbo.pgno, __bam_free);
if (bigdata)
(void)__db_doff(dbp, dbo.pgno, __bam_free);
}
return (ret);
}
/*
* __bam_ndup --
* Check to see if the duplicate set at indx should have its own page.
* If it should, create it.
*/
static int
__bam_ndup(dbp, h, indx)
DB *dbp;
PAGE *h;
u_int32_t indx;
{
BKEYDATA *bk;
BOVERFLOW bo;
DBT hdr;
PAGE *cp;
db_indx_t cnt, cpindx, first, sz;
int ret;
while (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
indx -= P_INDX;
for (cnt = 0, sz = 0, first = indx;; ++cnt, indx += P_INDX) {
if (indx >= NUM_ENT(h) || h->inp[first] != h->inp[indx])
break;
bk = GET_BKEYDATA(h, indx);
sz += bk->type == B_KEYDATA ?
BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE;
bk = GET_BKEYDATA(h, indx + O_INDX);
sz += bk->type == B_KEYDATA ?
BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE;
}
/*
* If this set of duplicates is using more than 25% of the page, move
* them off. The choice of 25% is a WAG, but it has to be small enough
* that we can always split regardless of the presence of duplicates.
*/
if (sz < dbp->pgsize / 4)
return (0);
/* Get a new page. */
if ((ret = __bam_new(dbp, P_DUPLICATE, &cp)) != 0)
return (ret);
/*
* Move this set of duplicates off the page. First points to the first
* key of the first duplicate key/data pair, cnt is the number of pairs
* we're dealing with.
*/
memset(&hdr, 0, sizeof(hdr));
for (indx = first + O_INDX, cpindx = 0;; ++cpindx) {
/* Copy the entry to the new page. */
bk = GET_BKEYDATA(h, indx);
hdr.data = bk;
hdr.size = bk->type == B_KEYDATA ?
BKEYDATA_SIZE(bk->len) : BOVERFLOW_SIZE;
if ((ret =
__db_pitem(dbp, cp, cpindx, hdr.size, &hdr, NULL)) != 0)
goto err;
/*
* Move cursors referencing the old entry to the new entry.
* Done after the page put because __db_pitem() adjusts
* cursors on the new page, and before the delete because
* __db_ditem adjusts cursors on the old page.
*/
__bam_ca_dup(dbp,
PGNO(h), first, indx - O_INDX, PGNO(cp), cpindx);
/* Delete the data item. */
if ((ret = __db_ditem(dbp, h, indx, hdr.size)) != 0)
goto err;
/* Delete all but the first reference to the key. */
if (--cnt == 0)
break;
if ((ret = __bam_adjindx(dbp, h, indx, first, 0)) != 0)
goto err;
}
/* Put in a new data item that points to the duplicates page. */
bo.deleted = 0;
bo.type = B_DUPLICATE;
bo.pgno = cp->pgno;
bo.tlen = 0;
OVPUT(h, indx, bo);
return (memp_fput(dbp->mpf, cp, DB_MPOOL_DIRTY));
err: (void)__bam_free(dbp, cp);
return (ret);
}
/*
* __bam_fixed --
* Build the real record for a fixed length put.
*/
static int
__bam_fixed(t, dbt)
BTREE *t;
DBT *dbt;
{
RECNO *rp;
rp = t->bt_recno;
/*
* If using fixed-length records, and the record is long, return
* EINVAL. If it's short, pad it out. Use the record data return
* memory, it's only short-term.
*/
if (dbt->size > rp->re_len)
return (EINVAL);
if (t->bt_rdata.ulen < rp->re_len) {
t->bt_rdata.data = t->bt_rdata.data == NULL ?
(void *)malloc(rp->re_len) :
(void *)realloc(t->bt_rdata.data, rp->re_len);
if (t->bt_rdata.data == NULL) {
t->bt_rdata.ulen = 0;
return (ENOMEM);
}
t->bt_rdata.ulen = rp->re_len;
}
memcpy(t->bt_rdata.data, dbt->data, dbt->size);
memset((u_int8_t *)t->bt_rdata.data + dbt->size,
rp->re_pad, rp->re_len - dbt->size);
/* Set the DBT to reference our new record. */
t->bt_rdata.size = rp->re_len;
t->bt_rdata.dlen = 0;
t->bt_rdata.doff = 0;
t->bt_rdata.flags = 0;
*dbt = t->bt_rdata;
return (0);
}
/*
* __bam_partial --
* Build the real record for a partial put.
*/
static int
__bam_partial(dbp, dbt, h, indx)
DB *dbp;
DBT *dbt;
PAGE *h;
u_int32_t indx;
{
BTREE *t;
BKEYDATA *bk, tbk;
BOVERFLOW *bo;
DBT copy;
u_int32_t len, nbytes, tlen;
int ret;
u_int8_t *p;
bo = NULL; /* XXX: Shut the compiler up. */
t = dbp->internal;
/*
* Figure out how much total space we'll need. Worst case is where
* the record is 0 bytes long, in which case doff causes the record
* to extend, and the put data is appended to it.
*/
if (indx < NUM_ENT(h)) {
bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
if (bk->type == B_OVERFLOW) {
bo = (BOVERFLOW *)bk;
nbytes = bo->tlen;
} else
nbytes = bk->len;
} else {
bk = &tbk;
bk->type = B_KEYDATA;
nbytes = bk->len = 0;
}
nbytes += dbt->doff + dbt->size + dbt->dlen;
/* Allocate the space. */
if (t->bt_rdata.ulen < nbytes) {
t->bt_rdata.data = t->bt_rdata.data == NULL ?
(void *)malloc(nbytes) :
(void *)realloc(t->bt_rdata.data, nbytes);
if (t->bt_rdata.data == NULL) {
t->bt_rdata.ulen = 0;
return (ENOMEM);
}
t->bt_rdata.ulen = nbytes;
}
/* We use nul bytes for extending the record, get it over with. */
memset(t->bt_rdata.data, 0, nbytes);
tlen = 0;
if (bk->type == B_OVERFLOW) {
/* Take up to doff bytes from the record. */
memset(&copy, 0, sizeof(copy));
if ((ret = __db_goff(dbp, &copy, bo->tlen,
bo->pgno, &t->bt_rdata.data, &t->bt_rdata.ulen)) != 0)
return (ret);
tlen += dbt->doff;
/*
* If the original record was larger than the offset:
* If dlen > size, shift the remaining data down.
* If dlen < size, shift the remaining data up.
* Use memmove(), the regions may overlap.
*/
p = t->bt_rdata.data;
if (bo->tlen > dbt->doff)
if (dbt->dlen > dbt->size) {
tlen += len = bo->tlen -
dbt->doff - (dbt->dlen - dbt->size);
memmove(p + dbt->doff + dbt->size,
p + dbt->doff + dbt->dlen, len);
} else if (dbt->dlen < dbt->size) {
tlen += len = bo->tlen -
dbt->doff - (dbt->size - dbt->dlen);
memmove(p + dbt->doff + dbt->dlen,
p + dbt->doff + dbt->size, len);
} else
tlen += bo->tlen - dbt->doff;
/* Copy in the user's data. */
memcpy((u_int8_t *)t->bt_rdata.data + dbt->doff,
dbt->data, dbt->size);
tlen += dbt->size;
} else {
/* Take up to doff bytes from the record. */
memcpy(t->bt_rdata.data,
bk->data, dbt->doff > bk->len ? bk->len : dbt->doff);
tlen += dbt->doff;
/* Copy in the user's data. */
memcpy((u_int8_t *)t->bt_rdata.data +
dbt->doff, dbt->data, dbt->size);
tlen += dbt->size;
/* Copy in any remaining data. */
len = dbt->doff + dbt->dlen;
if (bk->len > len) {
memcpy((u_int8_t *)t->bt_rdata.data + dbt->doff +
dbt->size, bk->data + len, bk->len - len);
tlen += bk->len - len;
}
}
/* Set the DBT to reference our new record. */
t->bt_rdata.size = tlen;
t->bt_rdata.dlen = 0;
t->bt_rdata.doff = 0;
t->bt_rdata.flags = 0;
*dbt = t->bt_rdata;
return (0);
}

767
db2/btree/bt_rec.c Normal file
View File

@ -0,0 +1,767 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)bt_rec.c 10.11 (Sleepycat) 8/22/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <ctype.h>
#include <errno.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "shqueue.h"
#include "hash.h"
#include "btree.h"
#include "log.h"
#include "db_dispatch.h"
#include "common_ext.h"
/*
* __bam_pg_alloc_recover --
* Recovery function for pg_alloc.
*
* PUBLIC: int __bam_pg_alloc_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__bam_pg_alloc_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__bam_pg_alloc_args *argp;
BTMETA *meta;
DB_MPOOLFILE *mpf;
PAGE *pagep;
DB *file_dbp, *mdbp;
db_pgno_t pgno;
int cmp_n, cmp_p, created, modified, ret;
REC_PRINT(__bam_pg_alloc_print);
REC_INTRO(__bam_pg_alloc_read);
/*
* Fix up the allocated page. If we're redoing the operation, we have
* to get the page (creating it if it doesn't exist), and update its
* LSN. If we're undoing the operation, we have to reset the page's
* LSN and put it on the free list.
*
* Fix up the metadata page. If we're redoing the operation, we have
* to get the metadata page and update its LSN and its free pointer.
* If we're undoing the operation and the page was ever created, we put
* it on the freelist.
*/
pgno = PGNO_METADATA;
if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) {
(void)__db_pgerr(file_dbp, pgno);
goto out;
}
if ((ret = memp_fget(mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
(void)__db_pgerr(file_dbp, argp->pgno);
(void)memp_fput(mpf, meta, 0);
goto out;
}
/* Fix up the allocated page. */
created = IS_ZERO_LSN(LSN(pagep));
modified = 0;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->page_lsn);
if ((created || cmp_p == 0) && redo) {
/* Need to redo update described. */
P_INIT(pagep, file_dbp->pgsize,
argp->pgno, PGNO_INVALID, PGNO_INVALID, 0, argp->ptype);
pagep->lsn = *lsnp;
modified = 1;
} else if ((created || cmp_n == 0) && !redo) {
/* Need to undo update described. */
P_INIT(pagep, file_dbp->pgsize,
argp->pgno, PGNO_INVALID, meta->free, 0, P_INVALID);
pagep->lsn = argp->page_lsn;
modified = 1;
}
if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
(void)__db_panic(file_dbp);
(void)memp_fput(mpf, meta, 0);
goto out;
}
/* Fix up the metadata page. */
modified = 0;
cmp_n = log_compare(lsnp, &LSN(meta));
cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
if (cmp_p == 0 && redo) {
/* Need to redo update described. */
meta->lsn = *lsnp;
meta->free = argp->next;
modified = 1;
} else if (cmp_n == 0 && !redo) {
/* Need to undo update described. */
meta->lsn = argp->meta_lsn;
meta->free = argp->pgno;
modified = 1;
}
if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
(void)__db_panic(file_dbp);
goto out;
}
*lsnp = argp->prev_lsn;
ret = 0;
out: REC_CLOSE;
}
/*
* __bam_pg_free_recover --
* Recovery function for pg_free.
*
* PUBLIC: int __bam_pg_free_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__bam_pg_free_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__bam_pg_free_args *argp;
BTMETA *meta;
DB *file_dbp, *mdbp;
DB_MPOOLFILE *mpf;
PAGE *pagep;
db_pgno_t pgno;
int cmp_n, cmp_p, modified, ret;
REC_PRINT(__bam_pg_free_print);
REC_INTRO(__bam_pg_free_read);
/*
* Fix up the freed page. If we're redoing the operation we get the
* page and explicitly discard its contents, then update its LSN. If
* we're undoing the operation, we get the page and restore its header.
*/
if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
(void)__db_pgerr(file_dbp, argp->pgno);
goto out;
}
modified = 0;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &LSN(argp->header.data));
if (cmp_p == 0 && redo) {
/* Need to redo update described. */
P_INIT(pagep, file_dbp->pgsize,
pagep->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
pagep->lsn = *lsnp;
modified = 1;
} else if (cmp_n == 0 && !redo) {
/* Need to undo update described. */
memcpy(pagep, argp->header.data, argp->header.size);
modified = 1;
}
if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
(void)__db_panic(file_dbp);
goto out;
}
/*
* Fix up the metadata page. If we're redoing or undoing the operation
* we get the page and update its LSN and free pointer.
*/
pgno = PGNO_METADATA;
if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) {
(void)__db_pgerr(file_dbp, pgno);
goto out;
}
modified = 0;
cmp_n = log_compare(lsnp, &LSN(meta));
cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
if (cmp_p == 0 && redo) {
/* Need to redo update described. */
meta->free = argp->pgno;
meta->lsn = *lsnp;
modified = 1;
} else if (cmp_n == 0 && !redo) {
/* Need to undo update described. */
meta->free = argp->next;
meta->lsn = argp->meta_lsn;
modified = 1;
}
if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
(void)__db_panic(file_dbp);
goto out;
}
*lsnp = argp->prev_lsn;
ret = 0;
out: REC_CLOSE;
}
/*
* __bam_split_recover --
* Recovery function for split.
*
* PUBLIC: int __bam_split_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__bam_split_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__bam_split_args *argp;
DB *file_dbp, *mdbp;
DB_MPOOLFILE *mpf;
PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp;
db_pgno_t pgno;
int l_update, p_update, r_update, ret, rootsplit, t_ret;
REC_PRINT(__bam_split_print);
mpf = NULL;
_lp = lp = np = pp = _rp = rp = NULL;
REC_INTRO(__bam_split_read);
/*
* There are two kinds of splits that we have to recover from. The
* first is a root-page split, where the root page is split from a
* leaf page into an internal page and two new leaf pages are created.
* The second is where a page is split into two pages, and a new key
* is inserted into the parent page.
*/
sp = argp->pg.data;
pgno = PGNO(sp);
rootsplit = pgno == PGNO_ROOT;
if (memp_fget(mpf, &argp->left, 0, &lp) != 0)
lp = NULL;
if (memp_fget(mpf, &argp->right, 0, &rp) != 0)
rp = NULL;
if (redo) {
l_update = r_update = p_update = 0;
/*
* Decide if we need to resplit the page.
*
* If this is a root split, then the root has to exist, it's
* the page we're splitting and it gets modified. If this is
* not a root split, then the left page has to exist, for the
* same reason.
*/
if (rootsplit) {
if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) {
(void)__db_pgerr(file_dbp, pgno);
pp = NULL;
goto out;
}
p_update =
log_compare(&LSN(pp), &LSN(argp->pg.data)) == 0;
} else
if (lp == NULL) {
(void)__db_pgerr(file_dbp, argp->left);
goto out;
}
if (lp == NULL || log_compare(&LSN(lp), &argp->llsn) == 0)
l_update = 1;
if (rp == NULL || log_compare(&LSN(rp), &argp->rlsn) == 0)
r_update = 1;
if (!p_update && !l_update && !r_update)
goto done;
/* Allocate and initialize new left/right child pages. */
if ((_lp = (PAGE *)malloc(file_dbp->pgsize)) == NULL)
goto nomem;
if ((_rp = (PAGE *)malloc(file_dbp->pgsize)) == NULL) {
nomem: errno = ENOMEM;
__db_err(file_dbp->dbenv, "%s", strerror(errno));
goto out;
}
if (rootsplit) {
P_INIT(_lp, file_dbp->pgsize, argp->left,
PGNO_INVALID,
ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
LEVEL(sp), TYPE(sp));
P_INIT(_rp, file_dbp->pgsize, argp->right,
ISINTERNAL(sp) ? PGNO_INVALID : argp->left,
PGNO_INVALID, LEVEL(sp), TYPE(sp));
} else {
P_INIT(_lp, file_dbp->pgsize, PGNO(sp),
ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp),
ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
LEVEL(sp), TYPE(sp));
P_INIT(_rp, file_dbp->pgsize, argp->right,
ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno,
ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp),
LEVEL(sp), TYPE(sp));
}
/* Split the page. */
if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 ||
(ret = __bam_copy(file_dbp, sp, _rp, argp->indx,
NUM_ENT(sp))) != 0)
goto out;
/* If the left child is wrong, update it. */
if (lp == NULL && (ret =
memp_fget(mpf, &argp->left, DB_MPOOL_CREATE, &lp)) != 0) {
(void)__db_pgerr(file_dbp, argp->left);
lp = NULL;
goto out;
}
if (l_update) {
memcpy(lp, _lp, file_dbp->pgsize);
lp->lsn = *lsnp;
if ((ret = memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0)
goto fatal;
lp = NULL;
}
/* If the right child is wrong, update it. */
if (rp == NULL && (ret = memp_fget(mpf,
&argp->right, DB_MPOOL_CREATE, &rp)) != 0) {
(void)__db_pgerr(file_dbp, argp->right);
rp = NULL;
goto out;
}
if (r_update) {
memcpy(rp, _rp, file_dbp->pgsize);
rp->lsn = *lsnp;
if ((ret = memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0)
goto fatal;
rp = NULL;
}
/*
* If the parent page is wrong, update it. This is of interest
* only if it was a root split, since root splits create parent
* pages. All other splits modify a parent page, but those are
* separately logged and recovered.
*/
if (rootsplit && p_update) {
if (file_dbp->type == DB_BTREE)
P_INIT(pp, file_dbp->pgsize,
PGNO_ROOT, PGNO_INVALID, PGNO_INVALID,
_lp->level + 1, P_IBTREE);
else
P_INIT(pp, file_dbp->pgsize,
PGNO_ROOT, PGNO_INVALID, PGNO_INVALID,
_lp->level + 1, P_IRECNO);
RE_NREC_SET(pp,
file_dbp->type == DB_RECNO ||
F_ISSET(file_dbp, DB_BT_RECNUM) ?
__bam_total(_lp) + __bam_total(_rp) : 0);
pp->lsn = *lsnp;
if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0)
goto fatal;
pp = NULL;
}
/*
* Finally, redo the next-page link if necessary. This is of
* interest only if it wasn't a root split -- inserting a new
* page in the tree requires that any following page have its
* previous-page pointer updated to our new page. The next
* page had better exist.
*/
if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) {
if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) {
(void)__db_pgerr(file_dbp, argp->npgno);
np = NULL;
goto out;
}
if (log_compare(&LSN(np), &argp->nlsn) == 0) {
PREV_PGNO(np) = argp->right;
np->lsn = *lsnp;
if ((ret = memp_fput(mpf,
np, DB_MPOOL_DIRTY)) != 0)
goto fatal;
np = NULL;
}
}
} else {
/*
* If the split page is wrong, replace its contents with the
* logged page contents. The split page had better exist.
*/
if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) {
(void)__db_pgerr(file_dbp, pgno);
pp = NULL;
goto out;
}
if (log_compare(lsnp, &LSN(pp)) == 0) {
memcpy(pp, argp->pg.data, argp->pg.size);
if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0)
goto fatal;
pp = NULL;
}
/*
* If it's a root split and the left child ever existed, put
* it on the free list. (If it's not a root split, we just
* updated the left page -- it's the same as the split page.)
* If the right child ever existed, root split or not, put it
* on the free list.
*/
if ((rootsplit && lp != NULL) || rp != NULL) {
if (rootsplit && lp != NULL &&
log_compare(lsnp, &LSN(lp)) == 0) {
lp->lsn = argp->llsn;
if ((ret =
memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0)
goto fatal;
lp = NULL;
}
if (rp != NULL &&
log_compare(lsnp, &LSN(rp)) == 0) {
rp->lsn = argp->rlsn;
if ((ret =
memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0)
goto fatal;
rp = NULL;
}
}
/*
* Finally, undo the next-page link if necessary. This is of
* interest only if it wasn't a root split -- inserting a new
* page in the tree requires that any following page have its
* previous-page pointer updated to our new page. The next
* page had better exist.
*/
if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) {
if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) {
(void)__db_pgerr(file_dbp, argp->npgno);
np = NULL;
goto out;
}
if (log_compare(lsnp, &LSN(np)) == 0) {
PREV_PGNO(np) = argp->left;
np->lsn = argp->nlsn;
if (memp_fput(mpf, np, DB_MPOOL_DIRTY))
goto fatal;
np = NULL;
}
}
}
done: ret = 0;
*lsnp = argp->prev_lsn;
if (0) {
fatal: (void)__db_panic(file_dbp);
}
out: /* Free any pages that weren't dirtied. */
if (pp != NULL && (t_ret = memp_fput(mpf, pp, 0)) != 0 && ret == 0)
ret = t_ret;
if (lp != NULL && (t_ret = memp_fput(mpf, lp, 0)) != 0 && ret == 0)
ret = t_ret;
if (np != NULL && (t_ret = memp_fput(mpf, np, 0)) != 0 && ret == 0)
ret = t_ret;
if (rp != NULL && (t_ret = memp_fput(mpf, rp, 0)) != 0 && ret == 0)
ret = t_ret;
/* Free any allocated space. */
if (_lp != NULL)
free(_lp);
if (_rp != NULL)
free(_rp);
REC_CLOSE;
}
/*
* __bam_rsplit_recover --
* Recovery function for a reverse split.
*
* PUBLIC: int __bam_rsplit_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__bam_rsplit_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__bam_rsplit_args *argp;
DB *file_dbp, *mdbp;
DB_MPOOLFILE *mpf;
PAGE *pagep;
db_pgno_t pgno;
int cmp_n, cmp_p, modified, ret;
REC_PRINT(__bam_rsplit_print);
REC_INTRO(__bam_rsplit_read);
/* Fix the root page. */
pgno = PGNO_ROOT;
if ((ret = memp_fget(mpf, &pgno, 0, &pagep)) != 0) {
__db_pgerr(file_dbp, pgno);
pagep = NULL;
goto out;
}
modified = 0;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->rootlsn);
if (cmp_p == 0 && redo) {
/* Need to redo update described. */
memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
pagep->pgno = PGNO_ROOT;
pagep->lsn = *lsnp;
modified = 1;
} else if (cmp_n == 0 && !redo) {
/* Need to undo update described. */
P_INIT(pagep, file_dbp->pgsize, PGNO_ROOT,
PGNO_INVALID, PGNO_INVALID, pagep->level + 1, TYPE(pagep));
if ((ret = __db_pitem(file_dbp, pagep, 0,
argp->rootent.size, &argp->rootent, NULL)) != 0)
goto out;
pagep->lsn = argp->rootlsn;
modified = 1;
}
if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
(void)__db_panic(file_dbp);
goto out;
}
/* Fix the page copied over the root page. */
if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
(void)__db_pgerr(file_dbp, argp->pgno);
pagep = NULL;
goto out;
}
modified = 0;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &LSN(argp->pgdbt.data));
if (cmp_p == 0 && redo) {
/* Need to redo update described. */
pagep->lsn = *lsnp;
modified = 1;
} else if (cmp_n == 0 && !redo) {
/* Need to undo update described. */
memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
modified = 1;
}
if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
(void)__db_panic(file_dbp);
goto out;
}
ret = 0;
*lsnp = argp->prev_lsn;
out: REC_CLOSE;
}
/*
* __bam_adj_recover --
* Recovery function for adj.
*
* PUBLIC: int __bam_adj_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__bam_adj_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__bam_adj_args *argp;
DB *file_dbp, *mdbp;
DB_MPOOLFILE *mpf;
PAGE *pagep;
int cmp_n, cmp_p, modified, ret;
REC_PRINT(__bam_adj_print);
REC_INTRO(__bam_adj_read);
if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
(void)__db_pgerr(file_dbp, argp->pgno);
pagep = NULL;
goto out;
}
modified = 0;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->lsn);
if (cmp_p == 0 && redo) {
/* Need to redo update described. */
if ((ret = __bam_adjindx(file_dbp,
pagep, argp->indx, argp->indx_copy, argp->is_insert)) != 0)
goto err;
LSN(pagep) = *lsnp;
modified = 1;
} else if (cmp_n == 0 && !redo) {
/* Need to undo update described. */
if ((ret = __bam_adjindx(file_dbp,
pagep, argp->indx, argp->indx_copy, !argp->is_insert)) != 0)
goto err;
LSN(pagep) = argp->lsn;
modified = 1;
}
if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0)
*lsnp = argp->prev_lsn;
if (0) {
err: (void)memp_fput(mpf, pagep, 0);
}
out: REC_CLOSE;
}
/*
* __bam_cadjust_recover --
* Recovery function for the adjust of a count change in an internal
* page.
*
* PUBLIC: int __bam_cadjust_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__bam_cadjust_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__bam_cadjust_args *argp;
DB *file_dbp, *mdbp;
DB_MPOOLFILE *mpf;
PAGE *pagep;
int cmp_n, cmp_p, modified, ret;
REC_PRINT(__bam_cadjust_print);
REC_INTRO(__bam_cadjust_read);
if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
errno = __db_pgerr(file_dbp, argp->pgno);
pagep = NULL;
goto out;
}
modified = 0;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->lsn);
if (cmp_p == 0 && redo) {
/* Need to redo update described. */
if (file_dbp->type == DB_BTREE &&
F_ISSET(file_dbp, DB_BT_RECNUM)) {
GET_BINTERNAL(pagep, argp->indx)->nrecs += argp->adjust;
if (argp->total && PGNO(pagep) == PGNO_ROOT)
RE_NREC_ADJ(pagep, argp->adjust);
}
if (file_dbp->type == DB_RECNO) {
GET_RINTERNAL(pagep, argp->indx)->nrecs += argp->adjust;
if (argp->total && PGNO(pagep) == PGNO_ROOT)
RE_NREC_ADJ(pagep, argp->adjust);
}
LSN(pagep) = *lsnp;
modified = 1;
} else if (cmp_n == 0 && !redo) {
/* Need to undo update described. */
if (file_dbp->type == DB_BTREE &&
F_ISSET(file_dbp, DB_BT_RECNUM)) {
GET_BINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust;
if (argp->total && PGNO(pagep) == PGNO_ROOT)
RE_NREC_ADJ(pagep, argp->adjust);
}
if (file_dbp->type == DB_RECNO) {
GET_RINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust;
if (argp->total && PGNO(pagep) == PGNO_ROOT)
RE_NREC_ADJ(pagep, -(argp->adjust));
}
LSN(pagep) = argp->lsn;
modified = 1;
}
if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0)
*lsnp = argp->prev_lsn;
out: REC_CLOSE;
}
/*
* __bam_cdel_recover --
* Recovery function for the intent-to-delete of a cursor record.
*
* PUBLIC: int __bam_cdel_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__bam_cdel_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__bam_cdel_args *argp;
DB *file_dbp, *mdbp;
DB_MPOOLFILE *mpf;
PAGE *pagep;
int cmp_n, cmp_p, modified, ret;
REC_PRINT(__bam_cdel_print);
REC_INTRO(__bam_cdel_read);
if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
(void)__db_pgerr(file_dbp, argp->pgno);
pagep = NULL;
goto out;
}
modified = 0;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->lsn);
if (cmp_p == 0 && redo) {
/* Need to redo update described. */
GET_BKEYDATA(pagep, argp->indx + O_INDX)->deleted = 1;
LSN(pagep) = *lsnp;
modified = 1;
} else if (cmp_n == 0 && !redo) {
/* Need to undo update described. */
GET_BKEYDATA(pagep, argp->indx + O_INDX)->deleted = 0;
LSN(pagep) = argp->lsn;
modified = 1;
}
if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0)
*lsnp = argp->prev_lsn;
out: REC_CLOSE;
}

1195
db2/btree/bt_recno.c Normal file

File diff suppressed because it is too large Load Diff

347
db2/btree/bt_rsearch.c Normal file
View File

@ -0,0 +1,347 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
* Keith Bostic. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)bt_rsearch.c 10.8 (Sleepycat) 8/24/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "btree.h"
/*
* __bam_rsearch --
* Search a btree for a record number.
*
* PUBLIC: int __bam_rsearch __P((DB *, db_recno_t *, u_int, int, int *));
*/
int
__bam_rsearch(dbp, recnop, flags, stop, exactp)
DB *dbp;
db_recno_t *recnop;
u_int flags;
int stop, *exactp;
{
BINTERNAL *bi;
BTREE *t;
DB_LOCK lock;
PAGE *h;
RINTERNAL *ri;
db_indx_t indx, top;
db_pgno_t pg;
db_recno_t recno, total;
int isappend, ret, stack;
t = dbp->internal;
/*
* We test for groups of flags, S_APPEND is the only one that can be
* OR'd into the set. Clear it now so that the tests for equality
* will work.
*/
if ((isappend = LF_ISSET(S_APPEND)) != 0)
LF_CLR(S_APPEND);
/*
* There are several ways we search a btree tree. The flags argument
* specifies if we're acquiring read or write locks and if we are
* locking pairs of pages. See btree.h for more details.
*
* If write-locking pages, we need to know whether or not to acquire a
* write lock on a page before getting it. This depends on how deep it
* is in tree, which we don't know until we acquire the root page. So,
* if we need to lock the root page we may have to upgrade it later,
* because we won't get the correct lock initially.
*
* Retrieve the root page.
*/
pg = PGNO_ROOT;
if ((ret = __bam_lget(dbp, 0, PGNO_ROOT,
flags == S_INSERT || flags == S_DELETE ?
DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
return (ret);
if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) {
(void)__BT_LPUT(dbp, lock);
return (ret);
}
total = RE_NREC(h);
/*
* If appending to the tree, set the record number now -- we have the
* root page locked.
*
* Delete only deletes exact matches, read only returns exact matches.
* Note, this is different from __bam_search(), which returns non-exact
* matches for read.
*
* The record may not exist. We can only return the correct location
* for the record immediately after the last record in the tree, so do
* a fast check now.
*/
if (isappend) {
*exactp = 0;
*recnop = recno = total + 1;
} else {
recno = *recnop;
if (recno <= total)
*exactp = 1;
else {
*exactp = 0;
if (flags == S_DELETE ||
flags == S_FIND || recno > total + 1) {
(void)memp_fput(dbp->mpf, h, 0);
(void)__BT_LPUT(dbp, lock);
return (DB_NOTFOUND);
}
}
}
/* Decide if we're building a stack based on the operation. */
BT_STK_CLR(t);
stack = flags == S_DELETE || flags == S_INSERT;
/*
* Decide if we need to save this page; if we do, write lock it, and
* start to build a stack.
*/
if (LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) {
(void)memp_fput(dbp->mpf, h, 0);
if ((ret = __bam_lget(dbp, 1, pg, DB_LOCK_WRITE, &lock)) != 0)
return (ret);
if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) {
(void)__BT_LPUT(dbp, lock);
return (ret);
}
stack = 1;
}
/* Records in the tree are 0-based, and record numbers are 1-based. */
--recno;
for (total = 0;;) {
switch (TYPE(h)) {
case P_LBTREE:
BT_STK_ENTER(t, h, (recno - total) * P_INDX, lock, ret);
return (ret);
case P_IBTREE:
for (indx = 0, top = NUM_ENT(h);;) {
bi = GET_BINTERNAL(h, indx);
if (++indx == top || total + bi->nrecs > recno)
break;
total += bi->nrecs;
}
pg = bi->pgno;
break;
case P_LRECNO:
BT_STK_ENTER(t, h, recno - total, lock, ret);
return (ret);
case P_IRECNO:
for (indx = 0, top = NUM_ENT(h);;) {
ri = GET_RINTERNAL(h, indx);
if (++indx == top || total + ri->nrecs > recno)
break;
total += ri->nrecs;
}
pg = ri->pgno;
break;
default:
return (__db_pgfmt(dbp, h->pgno));
}
--indx;
if (stack) {
/* Return if this is the lowest page wanted. */
if (LF_ISSET(S_PARENT) && stop == h->level) {
BT_STK_ENTER(t, h, indx, lock, ret);
return (ret);
}
BT_STK_PUSH(t, h, indx, lock, ret);
if (ret)
goto err;
if ((ret = __bam_lget(dbp, 0, pg,
LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ,
&lock)) != 0)
goto err;
} else {
(void)memp_fput(dbp->mpf, h, 0);
/*
* Decide if we want to return a pointer to the next
* page in the stack. If we do, write lock it and
* never unlock it.
*/
if (LF_ISSET(S_PARENT) &&
(u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1))
stack = 1;
if ((ret = __bam_lget(dbp, 1, pg,
LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ,
&lock)) != 0)
goto err;
}
if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0)
goto err;
}
/* NOTREACHED */
err: BT_STK_POP(t);
__bam_stkrel(dbp);
return (ret);
}
/*
* __bam_adjust --
* Adjust the tree after adding or deleting a record.
*
* PUBLIC: int __bam_adjust __P((DB *, BTREE *, int));
*/
int
__bam_adjust(dbp, t, adjust)
DB *dbp;
BTREE *t;
int adjust;
{
EPG *epg;
PAGE *h;
int ret;
/* Update the record counts for the tree. */
for (epg = t->bt_sp; epg <= t->bt_csp; ++epg) {
h = epg->page;
if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO) {
if (DB_LOGGING(dbp) &&
(ret = __bam_cadjust_log(dbp->dbenv->lg_info,
dbp->txn, &LSN(h), 0, dbp->log_fileid,
PGNO(h), &LSN(h), (u_int32_t)epg->indx,
(int32_t)adjust, 1)) != 0)
return (ret);
if (TYPE(h) == P_IBTREE)
GET_BINTERNAL(h, epg->indx)->nrecs += adjust;
else
GET_RINTERNAL(h, epg->indx)->nrecs += adjust;
if (PGNO(h) == PGNO_ROOT)
RE_NREC_ADJ(h, adjust);
if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
return (ret);
}
}
return (0);
}
/*
* __bam_nrecs --
* Return the number of records in the tree.
*
* PUBLIC: int __bam_nrecs __P((DB *, db_recno_t *));
*/
int
__bam_nrecs(dbp, rep)
DB *dbp;
db_recno_t *rep;
{
DB_LOCK lock;
PAGE *h;
db_pgno_t pgno;
int ret;
pgno = PGNO_ROOT;
if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_READ, &lock)) != 0)
return (ret);
if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0)
return (ret);
*rep = RE_NREC(h);
(void)memp_fput(dbp->mpf, h, 0);
(void)__BT_TLPUT(dbp, lock);
return (0);
}
/*
* __bam_total --
* Return the number of records below a page.
*
* PUBLIC: db_recno_t __bam_total __P((PAGE *));
*/
db_recno_t
__bam_total(h)
PAGE *h;
{
db_recno_t recs;
db_indx_t nxt, top;
switch (TYPE(h)) {
case P_LBTREE:
recs = NUM_ENT(h) / 2;
break;
case P_IBTREE:
for (recs = 0, nxt = 0, top = NUM_ENT(h); nxt < top; ++nxt)
recs += GET_BINTERNAL(h, nxt)->nrecs;
break;
case P_LRECNO:
recs = NUM_ENT(h);
break;
case P_IRECNO:
for (recs = 0, nxt = 0, top = NUM_ENT(h); nxt < top; ++nxt)
recs += GET_RINTERNAL(h, nxt)->nrecs;
break;
default:
abort();
}
return (recs);
}

335
db2/btree/bt_search.c Normal file
View File

@ -0,0 +1,335 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
* Keith Bostic. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)bt_search.c 10.6 (Sleepycat) 8/22/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "btree.h"
/*
* __bam_search --
* Search a btree for a key.
*
* PUBLIC: int __bam_search __P((DB *,
* PUBLIC: const DBT *, u_int, int, db_recno_t *, int *));
*/
int
__bam_search(dbp, key, flags, stop, recnop, exactp)
DB *dbp;
const DBT *key;
u_int flags;
int stop, *exactp;
db_recno_t *recnop;
{
BTREE *t;
DB_LOCK lock;
EPG cur;
PAGE *h;
db_indx_t base, i, indx, lim;
db_pgno_t pg;
db_recno_t recno;
int cmp, jump, ret, stack;
t = dbp->internal;
recno = 0;
BT_STK_CLR(t);
/*
* There are several ways we search a btree tree. The flags argument
* specifies if we're acquiring read or write locks, if we position
* to the first or last item in a set of duplicates, if we return
* deleted items, and if we are locking pairs of pages. See btree.h
* for more details. In addition, if we're doing record numbers, we
* have to lock the entire tree regardless.
*
* If write-locking pages, we need to know whether or not to acquire a
* write lock on a page before getting it. This depends on how deep it
* is in tree, which we don't know until we acquire the root page. So,
* if we need to lock the root page we may have to upgrade it later,
* because we won't get the correct lock initially.
*
* Retrieve the root page.
*/
pg = PGNO_ROOT;
stack = F_ISSET(dbp, DB_BT_RECNUM) &&
(flags == S_INSERT || flags == S_DELETE);
if ((ret = __bam_lget(dbp,
0, pg, stack ? DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
return (ret);
if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) {
(void)__BT_LPUT(dbp, lock);
return (ret);
}
/* Decide if we need to save this page; if we do, write lock it. */
if (!stack &&
((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) ||
(LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
(void)memp_fput(dbp->mpf, h, 0);
if ((ret = __bam_lget(dbp, 1, pg, DB_LOCK_WRITE, &lock)) != 0)
return (ret);
if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) {
(void)__BT_LPUT(dbp, lock);
return (ret);
}
stack = 1;
}
for (;;) {
/*
* Do a binary search on the current page. If we're searching
* a leaf page, we have to manipulate the indices in groups of
* two. If we're searching an internal page, they're an index
* per page item. If we find an exact match on a leaf page,
* we're done.
*/
cur.page = h;
jump = TYPE(h) == P_LBTREE ? P_INDX : O_INDX;
for (base = 0,
lim = NUM_ENT(h) / (db_indx_t)jump; lim != 0; lim >>= 1) {
cur.indx = indx = base + ((lim >> 1) * jump);
if ((cmp = __bam_cmp(dbp, key, &cur)) == 0) {
if (TYPE(h) == P_LBTREE)
goto match;
goto next;
}
if (cmp > 0) {
base = indx + jump;
--lim;
}
}
/*
* No match found. Base is the smallest index greater than
* key and may be zero or a last + O_INDX index.
*
* If it's a leaf page, return base as the "found" value.
* Delete only deletes exact matches.
*/
if (TYPE(h) == P_LBTREE) {
*exactp = 0;
if (LF_ISSET(S_EXACT))
goto notfound;
BT_STK_ENTER(t, h, base, lock, ret);
return (ret);
}
/*
* If it's not a leaf page, record the internal page (which is
* a parent page for the key). Decrement the base by 1 if it's
* non-zero so that if a split later occurs, the inserted page
* will be to the right of the saved page.
*/
indx = base > 0 ? base - O_INDX : base;
/*
* If we're trying to calculate the record number, sum up
* all the record numbers on this page up to the indx point.
*/
if (recnop != NULL)
for (i = 0; i < indx; ++i)
recno += GET_BINTERNAL(h, i)->nrecs;
next: pg = GET_BINTERNAL(h, indx)->pgno;
if (stack) {
/* Return if this is the lowest page wanted. */
if (LF_ISSET(S_PARENT) && stop == h->level) {
BT_STK_ENTER(t, h, indx, lock, ret);
return (ret);
}
BT_STK_PUSH(t, h, indx, lock, ret);
if (ret != 0)
goto err;
if ((ret =
__bam_lget(dbp, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
goto err;
} else {
(void)memp_fput(dbp->mpf, h, 0);
/*
* Decide if we want to return a pointer to the next
* page in the stack. If we do, write lock it and
* never unlock it.
*/
if ((LF_ISSET(S_PARENT) &&
(u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1)) ||
(h->level - 1) == LEAFLEVEL)
stack = 1;
if ((ret =
__bam_lget(dbp, 1, pg, stack && LF_ISSET(S_WRITE) ?
DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
goto err;
}
if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0)
goto err;
}
/* NOTREACHED */
match: *exactp = 1;
/*
* If we're trying to calculate the record number, add in the
* offset on this page and correct for the fact that records
* in the tree are 0-based.
*/
if (recnop != NULL)
*recnop = recno + (indx / P_INDX) + 1;
/*
* If we got here, we know that we have a btree leaf page.
*
* If there are duplicates, go to the first/last one.
*/
if (LF_ISSET(S_DUPLAST))
while (indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
h->inp[indx] == h->inp[indx + P_INDX])
indx += P_INDX;
else
while (indx > 0 &&
h->inp[indx] == h->inp[indx - P_INDX])
indx -= P_INDX;
/*
* Now check if we are allowed to return deleted item; if not
* find/last the first non-deleted item.
*/
if (LF_ISSET(S_DELNO)) {
if (LF_ISSET(S_DUPLAST))
while (GET_BKEYDATA(h, indx + O_INDX)->deleted &&
indx > 0 &&
h->inp[indx] == h->inp[indx - P_INDX])
indx -= P_INDX;
else
while (GET_BKEYDATA(h, indx + O_INDX)->deleted &&
indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
h->inp[indx] == h->inp[indx + P_INDX])
indx += P_INDX;
if (GET_BKEYDATA(h, indx + O_INDX)->deleted)
goto notfound;
}
BT_STK_ENTER(t, h, indx, lock, ret);
return (ret);
notfound:
(void)memp_fput(dbp->mpf, h, 0);
(void)__BT_LPUT(dbp, lock);
ret = DB_NOTFOUND;
err: if (t->bt_csp > t->bt_sp) {
BT_STK_POP(t);
__bam_stkrel(dbp);
}
return (ret);
}
/*
* __bam_stkrel --
* Release all pages currently held in the stack.
*
* PUBLIC: int __bam_stkrel __P((DB *));
*/
int
__bam_stkrel(dbp)
DB *dbp;
{
BTREE *t;
EPG *epg;
t = dbp->internal;
for (epg = t->bt_sp; epg <= t->bt_csp; ++epg) {
(void)memp_fput(dbp->mpf, epg->page, 0);
(void)__BT_TLPUT(dbp, epg->lock);
}
return (0);
}
/*
* __bam_stkgrow --
* Grow the stack.
*
* PUBLIC: int __bam_stkgrow __P((BTREE *));
*/
int
__bam_stkgrow(t)
BTREE *t;
{
EPG *p;
size_t entries;
entries = t->bt_esp - t->bt_sp;
if ((p = (EPG *)calloc(entries * 2, sizeof(EPG))) == NULL)
return (ENOMEM);
memcpy(p, t->bt_sp, entries * sizeof(EPG));
if (t->bt_sp != t->bt_stack)
FREE(t->bt_sp, entries * sizeof(EPG));
t->bt_sp = p;
t->bt_csp = p + entries;
t->bt_esp = p + entries * 2;
return (0);
}

952
db2/btree/bt_split.c Normal file
View File

@ -0,0 +1,952 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
* Keith Bostic. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)bt_split.c 10.12 (Sleepycat) 8/24/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "btree.h"
static int __bam_page __P((DB *, EPG *, EPG *));
static int __bam_pinsert __P((DB *, EPG *, PAGE *, PAGE *));
static int __bam_psplit __P((DB *, EPG *, PAGE *, PAGE *, int));
static int __bam_root __P((DB *, EPG *));
/*
* __bam_split --
* Split a page.
*
* PUBLIC: int __bam_split __P((DB *, void *));
*/
int
__bam_split(dbp, arg)
DB *dbp;
void *arg;
{
BTREE *t;
enum { UP, DOWN } dir;
int exact, level, ret;
t = dbp->internal;
/*
* The locking protocol we use to avoid deadlock to acquire locks by
* walking down the tree, but we do it as lazily as possible, locking
* the root only as a last resort. We expect all stack pages to have
* been discarded before we're called; we discard all short-term locks.
*
* When __bam_split is first called, we know that a leaf page was too
* full for an insert. We don't know what leaf page it was, but we
* have the key/recno that caused the problem. We call XX_search to
* reacquire the leaf page, but this time get both the leaf page and
* its parent, locked. We then split the leaf page and see if the new
* internal key will fit into the parent page. If it will, we're done.
*
* If it won't, we discard our current locks and repeat the process,
* only this time acquiring the parent page and its parent, locked.
* This process repeats until we succeed in the split, splitting the
* root page as the final resort. The entire process then repeats,
* as necessary, until we split a leaf page.
*
* XXX
* A traditional method of speeding this up is to maintain a stack of
* the pages traversed in the original search. You can detect if the
* stack is correct by storing the page's LSN when it was searched and
* comparing that LSN with the current one when it's locked during the
* split. This would be an easy change for this code, but I have no
* numbers that indicate it's worthwhile.
*/
for (dir = UP, level = LEAFLEVEL;; dir == UP ? ++level : --level) {
/*
* Acquire a page and its parent, locked.
*/
if ((ret = (dbp->type == DB_BTREE ?
__bam_search(dbp, arg, S_WRPAIR, level, NULL, &exact) :
__bam_rsearch(dbp,
(db_recno_t *)arg, S_WRPAIR, level, &exact))) != 0)
return (ret);
/* Split the page. */
ret = t->bt_csp[0].page->pgno == PGNO_ROOT ?
__bam_root(dbp, &t->bt_csp[0]) :
__bam_page(dbp, &t->bt_csp[-1], &t->bt_csp[0]);
switch (ret) {
case 0:
/* Once we've split the leaf page, we're done. */
if (level == LEAFLEVEL)
return (0);
/* Switch directions. */
if (dir == UP)
dir = DOWN;
break;
case DB_NEEDSPLIT:
/*
* It's possible to fail to split repeatedly, as other
* threads may be modifying the tree, or the page usage
* is sufficiently bad that we don't get enough space
* the first time.
*/
if (dir == DOWN)
dir = UP;
break;
default:
return (ret);
}
}
/* NOTREACHED */
}
/*
* __bam_root --
* Split the root page of a btree.
*/
static int
__bam_root(dbp, cp)
DB *dbp;
EPG *cp;
{
BTREE *t;
PAGE *lp, *rp;
int ret;
t = dbp->internal;
/* Yeah, right. */
if (cp->page->level >= MAXBTREELEVEL)
return (ENOSPC);
/* Create new left and right pages for the split. */
lp = rp = NULL;
if ((ret = __bam_new(dbp, TYPE(cp->page), &lp)) != 0 ||
(ret = __bam_new(dbp, TYPE(cp->page), &rp)) != 0)
goto err;
P_INIT(lp, dbp->pgsize, lp->pgno,
PGNO_INVALID, ISINTERNAL(cp->page) ? PGNO_INVALID : rp->pgno,
cp->page->level, TYPE(cp->page));
P_INIT(rp, dbp->pgsize, rp->pgno,
ISINTERNAL(cp->page) ? PGNO_INVALID : lp->pgno, PGNO_INVALID,
cp->page->level, TYPE(cp->page));
/* Split the page. */
if ((ret = __bam_psplit(dbp, cp, lp, rp, 1)) != 0)
goto err;
/* Log the change. */
if (DB_LOGGING(dbp)) {
DBT __a;
DB_LSN __lsn;
memset(&__a, 0, sizeof(__a));
__a.data = cp->page;
__a.size = dbp->pgsize;
ZERO_LSN(__lsn);
if ((ret = __bam_split_log(dbp->dbenv->lg_info, dbp->txn,
&LSN(cp->page), 0, dbp->log_fileid, PGNO(lp), &LSN(lp),
PGNO(rp), &LSN(rp), (u_int32_t)NUM_ENT(lp), 0, &__lsn,
&__a)) != 0)
goto err;
LSN(lp) = LSN(rp) = LSN(cp->page);
}
/* Clean up the new root page. */
if ((ret = (dbp->type == DB_RECNO ?
__ram_root(dbp, cp->page, lp, rp) :
__bam_broot(dbp, cp->page, lp, rp))) != 0)
goto err;
/* Success -- write the real pages back to the store. */
(void)memp_fput(dbp->mpf, cp->page, DB_MPOOL_DIRTY);
(void)__BT_TLPUT(dbp, cp->lock);
(void)memp_fput(dbp->mpf, lp, DB_MPOOL_DIRTY);
(void)memp_fput(dbp->mpf, rp, DB_MPOOL_DIRTY);
++t->lstat.bt_split;
++t->lstat.bt_rootsplit;
return (0);
err: if (lp != NULL)
(void)__bam_free(dbp, lp);
if (rp != NULL)
(void)__bam_free(dbp, rp);
(void)memp_fput(dbp->mpf, cp->page, 0);
(void)__BT_TLPUT(dbp, cp->lock);
return (ret);
}
/*
* __bam_page --
* Split the non-root page of a btree.
*/
static int
__bam_page(dbp, pp, cp)
DB *dbp;
EPG *pp, *cp;
{
BTREE *t;
DB_LOCK tplock;
PAGE *lp, *rp, *tp;
int ret;
t = dbp->internal;
lp = rp = tp = NULL;
ret = -1;
/* Create new right page for the split. */
if ((ret = __bam_new(dbp, TYPE(cp->page), &rp)) != 0)
return (ret);
P_INIT(rp, dbp->pgsize, rp->pgno,
ISINTERNAL(cp->page) ? PGNO_INVALID : cp->page->pgno,
ISINTERNAL(cp->page) ? PGNO_INVALID : cp->page->next_pgno,
cp->page->level, TYPE(cp->page));
/* Create new left page for the split. */
if ((lp = (PAGE *)malloc(dbp->pgsize)) == NULL) {
ret = ENOMEM;
goto err;
}
#ifdef DEBUG
memset(lp, 0xff, dbp->pgsize);
#endif
P_INIT(lp, dbp->pgsize, cp->page->pgno,
ISINTERNAL(cp->page) ? PGNO_INVALID : cp->page->prev_pgno,
ISINTERNAL(cp->page) ? PGNO_INVALID : rp->pgno,
cp->page->level, TYPE(cp->page));
ZERO_LSN(lp->lsn);
/*
* Split right.
*
* Only the indices are sorted on the page, i.e., the key/data pairs
* aren't, so it's simpler to copy the data from the split page onto
* two new pages instead of copying half the data to the right page
* and compacting the left page in place. Since the left page can't
* change, we swap the original and the allocated left page after the
* split.
*/
if ((ret = __bam_psplit(dbp, cp, lp, rp, 0)) != 0)
goto err;
/*
* Fix up the previous pointer of any leaf page following the split
* page.
*
* !!!
* There are interesting deadlock situations here as we write-lock a
* page that's not in our direct ancestry. Consider a cursor walking
* through the leaf pages, that has the previous page read-locked and
* is waiting on a lock for the page we just split. It will deadlock
* here. If this is a problem, we can fail in the split; it's not a
* problem as the split will succeed after the cursor passes through
* the page we're splitting.
*/
if (TYPE(cp->page) == P_LBTREE && rp->next_pgno != PGNO_INVALID) {
if ((ret = __bam_lget(dbp,
0, rp->next_pgno, DB_LOCK_WRITE, &tplock)) != 0)
goto err;
if ((ret = __bam_pget(dbp, &tp, &rp->next_pgno, 0)) != 0)
goto err;
}
/* Insert the new pages into the parent page. */
if ((ret = __bam_pinsert(dbp, pp, lp, rp)) != 0)
goto err;
/* Log the change. */
if (DB_LOGGING(dbp)) {
DBT __a;
DB_LSN __lsn;
memset(&__a, 0, sizeof(__a));
__a.data = cp->page;
__a.size = dbp->pgsize;
if (tp == NULL)
ZERO_LSN(__lsn);
if ((ret = __bam_split_log(dbp->dbenv->lg_info, dbp->txn,
&cp->page->lsn, 0, dbp->log_fileid, PGNO(cp->page),
&LSN(cp->page), PGNO(rp), &LSN(rp), (u_int32_t)NUM_ENT(lp),
tp == NULL ? 0 : PGNO(tp),
tp == NULL ? &__lsn : &LSN(tp), &__a)) != 0)
goto err;
LSN(lp) = LSN(rp) = LSN(cp->page);
if (tp != NULL)
LSN(tp) = LSN(cp->page);
}
/* Copy the allocated page into place. */
memcpy(cp->page, lp, LOFFSET(lp));
memcpy((u_int8_t *)cp->page + HOFFSET(lp),
(u_int8_t *)lp + HOFFSET(lp), dbp->pgsize - HOFFSET(lp));
FREE(lp, dbp->pgsize);
lp = NULL;
/* Finish the next-page link. */
if (tp != NULL)
tp->prev_pgno = rp->pgno;
/* Success -- write the real pages back to the store. */
(void)memp_fput(dbp->mpf, pp->page, DB_MPOOL_DIRTY);
(void)__BT_TLPUT(dbp, pp->lock);
(void)memp_fput(dbp->mpf, cp->page, DB_MPOOL_DIRTY);
(void)__BT_TLPUT(dbp, cp->lock);
(void)memp_fput(dbp->mpf, rp, DB_MPOOL_DIRTY);
if (tp != NULL) {
(void)memp_fput(dbp->mpf, tp, DB_MPOOL_DIRTY);
(void)__BT_TLPUT(dbp, tplock);
}
return (0);
err: if (lp != NULL)
FREE(lp, dbp->pgsize);
if (rp != NULL)
(void)__bam_free(dbp, rp);
if (tp != NULL) {
(void)memp_fput(dbp->mpf, tp, 0);
(void)__BT_TLPUT(dbp, tplock);
}
(void)memp_fput(dbp->mpf, pp->page, 0);
(void)__BT_TLPUT(dbp, pp->lock);
(void)memp_fput(dbp->mpf, cp->page, 0);
(void)__BT_TLPUT(dbp, cp->lock);
return (ret);
}
/*
* __bam_broot --
* Fix up the btree root page after it has been split.
*
* PUBLIC: int __bam_broot __P((DB *, PAGE *, PAGE *, PAGE *));
*/
int
__bam_broot(dbp, rootp, lp, rp)
DB *dbp;
PAGE *rootp, *lp, *rp;
{
BINTERNAL bi, *child_bi;
BKEYDATA *child_bk;
DBT hdr, data;
int ret;
/*
* If the root page was a leaf page, change it into an internal page.
* We copy the key we split on (but not the key's data, in the case of
* a leaf page) to the new root page.
*/
P_INIT(rootp, dbp->pgsize,
PGNO_ROOT, PGNO_INVALID, PGNO_INVALID, lp->level + 1, P_IBTREE);
/*
* The btree comparison code guarantees that the left-most key on any
* level of the tree is never used, so it doesn't need to be filled in.
*/
bi.len = 0;
bi.deleted = 0;
bi.type = B_KEYDATA;
bi.pgno = lp->pgno;
if (F_ISSET(dbp, DB_BT_RECNUM)) {
bi.nrecs = __bam_total(lp);
RE_NREC_SET(rootp, bi.nrecs);
}
memset(&hdr, 0, sizeof(hdr));
hdr.data = &bi;
hdr.size = SSZA(BINTERNAL, data);
memset(&data, 0, sizeof(data));
data.data = (char *) "";
data.size = 0;
if ((ret =
__db_pitem(dbp, rootp, 0, BINTERNAL_SIZE(0), &hdr, &data)) != 0)
return (ret);
switch (TYPE(rp)) {
case P_IBTREE:
/* Copy the first key of the child page onto the root page. */
child_bi = GET_BINTERNAL(rp, 0);
bi.len = child_bi->len;
bi.deleted = 0;
bi.type = child_bi->type;
bi.pgno = rp->pgno;
if (F_ISSET(dbp, DB_BT_RECNUM)) {
bi.nrecs = __bam_total(rp);
RE_NREC_ADJ(rootp, bi.nrecs);
}
hdr.data = &bi;
hdr.size = SSZA(BINTERNAL, data);
data.data = child_bi->data;
data.size = child_bi->len;
if ((ret = __db_pitem(dbp, rootp, 1,
BINTERNAL_SIZE(child_bi->len), &hdr, &data)) != 0)
return (ret);
/* Increment the overflow ref count. */
if (child_bi->type == B_OVERFLOW && (ret =
__db_ioff(dbp, ((BOVERFLOW *)(child_bi->data))->pgno)) != 0)
return (ret);
break;
case P_LBTREE:
/* Copy the first key of the child page onto the root page. */
child_bk = GET_BKEYDATA(rp, 0);
switch (child_bk->type) {
case B_KEYDATA:
bi.len = child_bk->len;
bi.deleted = 0;
bi.type = child_bk->type;
bi.pgno = rp->pgno;
if (F_ISSET(dbp, DB_BT_RECNUM)) {
bi.nrecs = __bam_total(rp);
RE_NREC_ADJ(rootp, bi.nrecs);
}
hdr.data = &bi;
hdr.size = SSZA(BINTERNAL, data);
data.data = child_bk->data;
data.size = child_bk->len;
if ((ret = __db_pitem(dbp, rootp, 1,
BINTERNAL_SIZE(child_bk->len), &hdr, &data)) != 0)
return (ret);
break;
case B_DUPLICATE:
case B_OVERFLOW:
bi.len = BOVERFLOW_SIZE;
bi.deleted = 0;
bi.type = child_bk->type;
bi.pgno = rp->pgno;
if (F_ISSET(dbp, DB_BT_RECNUM)) {
bi.nrecs = __bam_total(rp);
RE_NREC_ADJ(rootp, bi.nrecs);
}
hdr.data = &bi;
hdr.size = SSZA(BINTERNAL, data);
data.data = child_bk;
data.size = BOVERFLOW_SIZE;
if ((ret = __db_pitem(dbp, rootp, 1,
BINTERNAL_SIZE(BOVERFLOW_SIZE), &hdr, &data)) != 0)
return (ret);
/* Increment the overflow ref count. */
if (child_bk->type == B_OVERFLOW && (ret =
__db_ioff(dbp, ((BOVERFLOW *)child_bk)->pgno)) != 0)
return (ret);
break;
default:
return (__db_pgfmt(dbp, rp->pgno));
}
break;
default:
return (__db_pgfmt(dbp, rp->pgno));
}
return (0);
}
/*
* __ram_root --
* Fix up the recno root page after it has been split.
*
* PUBLIC: int __ram_root __P((DB *, PAGE *, PAGE *, PAGE *));
*/
int
__ram_root(dbp, rootp, lp, rp)
DB *dbp;
PAGE *rootp, *lp, *rp;
{
DBT hdr;
RINTERNAL ri;
int ret;
/* Initialize the page. */
P_INIT(rootp, dbp->pgsize,
PGNO_ROOT, PGNO_INVALID, PGNO_INVALID, lp->level + 1, P_IRECNO);
/* Initialize the header. */
memset(&hdr, 0, sizeof(hdr));
hdr.data = &ri;
hdr.size = RINTERNAL_SIZE;
/* Insert the left and right keys, set the header information. */
ri.pgno = lp->pgno;
ri.nrecs = __bam_total(lp);
if ((ret = __db_pitem(dbp, rootp, 0, RINTERNAL_SIZE, &hdr, NULL)) != 0)
return (ret);
RE_NREC_SET(rootp, ri.nrecs);
ri.pgno = rp->pgno;
ri.nrecs = __bam_total(rp);
if ((ret = __db_pitem(dbp, rootp, 1, RINTERNAL_SIZE, &hdr, NULL)) != 0)
return (ret);
RE_NREC_ADJ(rootp, ri.nrecs);
return (0);
}
/*
* __bam_pinsert --
* Insert a new key into a parent page, completing the split.
*/
static int
__bam_pinsert(dbp, parent, lchild, rchild)
DB *dbp;
EPG *parent;
PAGE *lchild, *rchild;
{
BINTERNAL bi, *child_bi;
BKEYDATA *child_bk, *tmp_bk;
BTREE *t;
DBT a, b, hdr, data;
PAGE *ppage;
RINTERNAL ri;
db_indx_t off;
db_recno_t nrecs;
u_int32_t n, nbytes, nksize;
int ret;
t = dbp->internal;
ppage = parent->page;
/* If handling record numbers, count records split to the right page. */
nrecs = dbp->type == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM) ?
__bam_total(rchild) : 0;
/*
* Now we insert the new page's first key into the parent page, which
* completes the split. The parent points to a PAGE and a page index
* offset, where the new key goes ONE AFTER the index, because we split
* to the right.
*
* XXX
* Some btree algorithms replace the key for the old page as well as
* the new page. We don't, as there's no reason to believe that the
* first key on the old page is any better than the key we have, and,
* in the case of a key being placed at index 0 causing the split, the
* key is unavailable.
*/
off = parent->indx + O_INDX;
/*
* Calculate the space needed on the parent page.
*
* Prefix trees: space hack used when inserting into BINTERNAL pages.
* Retain only what's needed to distinguish between the new entry and
* the LAST entry on the page to its left. If the keys compare equal,
* retain the entire key. We ignore overflow keys, and the entire key
* must be retained for the next-to-leftmost key on the leftmost page
* of each level, or the search will fail. Applicable ONLY to internal
* pages that have leaf pages as children. Further reduction of the
* key between pairs of internal pages loses too much information.
*/
switch (TYPE(rchild)) {
case P_IBTREE:
child_bi = GET_BINTERNAL(rchild, 0);
nbytes = BINTERNAL_PSIZE(child_bi->len);
if (P_FREESPACE(ppage) < nbytes)
return (DB_NEEDSPLIT);
/* Add a new record for the right page. */
bi.len = child_bi->len;
bi.deleted = 0;
bi.type = child_bi->type;
bi.pgno = rchild->pgno;
bi.nrecs = nrecs;
memset(&hdr, 0, sizeof(hdr));
hdr.data = &bi;
hdr.size = SSZA(BINTERNAL, data);
memset(&data, 0, sizeof(data));
data.data = child_bi->data;
data.size = child_bi->len;
if ((ret = __db_pitem(dbp, ppage, off,
BINTERNAL_SIZE(child_bi->len), &hdr, &data)) != 0)
return (ret);
/* Increment the overflow ref count. */
if (child_bi->type == B_OVERFLOW && (ret =
__db_ioff(dbp, ((BOVERFLOW *)(child_bi->data))->pgno)) != 0)
return (ret);
break;
case P_LBTREE:
child_bk = GET_BKEYDATA(rchild, 0);
switch (child_bk->type) {
case B_KEYDATA:
nbytes = BINTERNAL_PSIZE(child_bk->len);
nksize = child_bk->len;
if (t->bt_prefix == NULL)
goto noprefix;
if (ppage->prev_pgno == PGNO_INVALID && off <= 1)
goto noprefix;
tmp_bk = GET_BKEYDATA(lchild, NUM_ENT(lchild) - P_INDX);
if (tmp_bk->type != B_KEYDATA)
goto noprefix;
memset(&a, 0, sizeof(a));
a.size = tmp_bk->len;
a.data = tmp_bk->data;
memset(&b, 0, sizeof(b));
b.size = child_bk->len;
b.data = child_bk->data;
nksize = t->bt_prefix(&a, &b);
if ((n = BINTERNAL_PSIZE(nksize)) < nbytes) {
t->lstat.bt_pfxsaved += nbytes - n;
nbytes = n;
} else
noprefix: nksize = child_bk->len;
if (P_FREESPACE(ppage) < nbytes)
return (DB_NEEDSPLIT);
bi.len = nksize;
bi.deleted = 0;
bi.type = child_bk->type;
bi.pgno = rchild->pgno;
bi.nrecs = nrecs;
memset(&hdr, 0, sizeof(hdr));
hdr.data = &bi;
hdr.size = SSZA(BINTERNAL, data);
memset(&data, 0, sizeof(data));
data.data = child_bk->data;
data.size = nksize;
if ((ret = __db_pitem(dbp, ppage, off,
BINTERNAL_SIZE(nksize), &hdr, &data)) != 0)
return (ret);
break;
case B_DUPLICATE:
case B_OVERFLOW:
nbytes = BINTERNAL_PSIZE(BOVERFLOW_SIZE);
if (P_FREESPACE(ppage) < nbytes)
return (DB_NEEDSPLIT);
bi.len = BOVERFLOW_SIZE;
bi.deleted = 0;
bi.type = child_bk->type;
bi.pgno = rchild->pgno;
bi.nrecs = nrecs;
memset(&hdr, 0, sizeof(hdr));
hdr.data = &bi;
hdr.size = SSZA(BINTERNAL, data);
memset(&data, 0, sizeof(data));
data.data = child_bk;
data.size = BOVERFLOW_SIZE;
if ((ret = __db_pitem(dbp, ppage, off,
BINTERNAL_SIZE(BOVERFLOW_SIZE), &hdr, &data)) != 0)
return (ret);
/* Increment the overflow ref count. */
if (child_bk->type == B_OVERFLOW && (ret =
__db_ioff(dbp, ((BOVERFLOW *)child_bk)->pgno)) != 0)
return (ret);
break;
default:
return (__db_pgfmt(dbp, rchild->pgno));
}
break;
case P_IRECNO:
case P_LRECNO:
nbytes = RINTERNAL_PSIZE;
if (P_FREESPACE(ppage) < nbytes)
return (DB_NEEDSPLIT);
/* Add a new record for the right page. */
memset(&hdr, 0, sizeof(hdr));
hdr.data = &ri;
hdr.size = RINTERNAL_SIZE;
ri.pgno = rchild->pgno;
ri.nrecs = nrecs;
if ((ret = __db_pitem(dbp,
ppage, off, RINTERNAL_SIZE, &hdr, NULL)) != 0)
return (ret);
break;
default:
return (__db_pgfmt(dbp, rchild->pgno));
}
/* Adjust the parent page's left page record count. */
if (dbp->type == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM)) {
/* Log the change. */
if (DB_LOGGING(dbp) &&
(ret = __bam_cadjust_log(dbp->dbenv->lg_info,
dbp->txn, &LSN(ppage), 0, dbp->log_fileid,
PGNO(ppage), &LSN(ppage), (u_int32_t)parent->indx,
-(int32_t)nrecs, (int32_t)0)) != 0)
return (ret);
/* Update the left page count. */
if (dbp->type == DB_RECNO)
GET_RINTERNAL(ppage, parent->indx)->nrecs -= nrecs;
else
GET_BINTERNAL(ppage, parent->indx)->nrecs -= nrecs;
}
return (0);
}
/*
* __bam_psplit --
* Do the real work of splitting the page.
*/
static int
__bam_psplit(dbp, cp, lp, rp, cleft)
DB *dbp;
EPG *cp;
PAGE *lp, *rp;
int cleft;
{
BTREE *t;
PAGE *pp;
db_indx_t half, nbytes, off, splitp, top;
int adjust, cnt, isbigkey, ret;
t = dbp->internal;
pp = cp->page;
adjust = TYPE(pp) == P_LBTREE ? P_INDX : O_INDX;
/*
* If we're splitting the first (last) page on a level because we're
* inserting (appending) a key to it, it's likely that the data is
* sorted. Moving a single item to the new page is less work and can
* push the fill factor higher than normal. If we're wrong it's not
* a big deal, we'll just do the split the right way next time.
*/
off = 0;
if (NEXT_PGNO(pp) == PGNO_INVALID &&
((ISINTERNAL(pp) && cp->indx == NUM_ENT(cp->page) - 1) ||
(!ISINTERNAL(pp) && cp->indx == NUM_ENT(cp->page))))
off = NUM_ENT(cp->page) - adjust;
else if (PREV_PGNO(pp) == PGNO_INVALID && cp->indx == 0)
off = adjust;
++t->lstat.bt_split;
if (off != 0) {
++t->lstat.bt_fastsplit;
goto sort;
}
/*
* Split the data to the left and right pages. Try not to split on
* an overflow key. (Overflow keys on internal pages will slow down
* searches.) Refuse to split in the middle of a set of duplicates.
*
* First, find the optimum place to split.
*
* It's possible to try and split past the last record on the page if
* there's a very large record at the end of the page. Make sure this
* doesn't happen by bounding the check at the next-to-last entry on
* the page.
*
* Note, we try and split half the data present on the page. This is
* because another process may have already split the page and left
* it half empty. We don't try and skip the split -- we don't know
* how much space we're going to need on the page, and we may need up
* to half the page for a big item, so there's no easy test to decide
* if we need to split or not. Besides, if two threads are inserting
* data into the same place in the database, we're probably going to
* need more space soon anyway.
*/
top = NUM_ENT(pp) - adjust;
half = (dbp->pgsize - HOFFSET(pp)) / 2;
for (nbytes = 0, off = 0; off < top && nbytes < half; ++off)
switch (TYPE(pp)) {
case P_IBTREE:
if (GET_BINTERNAL(pp, off)->type == B_KEYDATA)
nbytes +=
BINTERNAL_SIZE(GET_BINTERNAL(pp, off)->len);
else
nbytes += BINTERNAL_SIZE(BOVERFLOW_SIZE);
break;
case P_LBTREE:
if (GET_BKEYDATA(pp, off)->type == B_KEYDATA)
nbytes +=
BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len);
else
nbytes += BOVERFLOW_SIZE;
++off;
if (GET_BKEYDATA(pp, off)->type == B_KEYDATA)
nbytes +=
BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len);
else
nbytes += BOVERFLOW_SIZE;
break;
case P_IRECNO:
nbytes += RINTERNAL_SIZE;
break;
case P_LRECNO:
nbytes += BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len);
break;
default:
return (__db_pgfmt(dbp, pp->pgno));
}
sort: splitp = off;
/*
* Splitp is either at or just past the optimum split point. If
* it's a big key, try and find something close by that's not.
*/
if (TYPE(pp) == P_IBTREE)
isbigkey = GET_BINTERNAL(pp, off)->type != B_KEYDATA;
else if (TYPE(pp) == P_LBTREE)
isbigkey = GET_BKEYDATA(pp, off)->type != B_KEYDATA;
else
isbigkey = 0;
if (isbigkey)
for (cnt = 1; cnt <= 3; ++cnt) {
off = splitp + cnt * adjust;
if (off < (db_indx_t)NUM_ENT(pp) &&
((TYPE(pp) == P_IBTREE &&
GET_BINTERNAL(pp, off)->type == B_KEYDATA) ||
GET_BKEYDATA(pp, off)->type == B_KEYDATA)) {
splitp = off;
break;
}
if (splitp <= (db_indx_t)(cnt * adjust))
continue;
off = splitp - cnt * adjust;
if (TYPE(pp) == P_IBTREE ?
GET_BINTERNAL(pp, off)->type == B_KEYDATA :
GET_BKEYDATA(pp, off)->type == B_KEYDATA) {
splitp = off;
break;
}
}
/*
* We can't split in the middle a set of duplicates. We know that
* no duplicate set can take up more than about 25% of the page,
* because that's the point where we push it off onto a duplicate
* page set. So, this loop can't be unbounded.
*/
if (F_ISSET(dbp, DB_AM_DUP) && TYPE(pp) == P_LBTREE &&
pp->inp[splitp] == pp->inp[splitp - adjust])
for (cnt = 1;; ++cnt) {
off = splitp + cnt * adjust;
if (off < NUM_ENT(pp) &&
pp->inp[splitp] != pp->inp[off]) {
splitp = off;
break;
}
if (splitp <= (db_indx_t)(cnt * adjust))
continue;
off = splitp - cnt * adjust;
if (pp->inp[splitp] != pp->inp[off]) {
splitp = off + adjust;
break;
}
}
/* We're going to split at splitp. */
if ((ret = __bam_copy(dbp, pp, lp, 0, splitp)) != 0)
return (ret);
if ((ret = __bam_copy(dbp, pp, rp, splitp, NUM_ENT(pp))) != 0)
return (ret);
/* Adjust the cursors. */
__bam_ca_split(dbp, pp->pgno, lp->pgno, rp->pgno, splitp, cleft);
return (0);
}
/*
* __bam_copy --
* Copy a set of records from one page to another.
*
* PUBLIC: int __bam_copy __P((DB *, PAGE *, PAGE *, u_int32_t, u_int32_t));
*/
int
__bam_copy(dbp, pp, cp, nxt, stop)
DB *dbp;
PAGE *pp, *cp;
u_int32_t nxt, stop;
{
db_indx_t dup, nbytes, off;
/*
* Copy the rest of the data to the right page. Nxt is the next
* offset placed on the target page.
*/
for (dup = off = 0; nxt < stop; ++nxt, ++NUM_ENT(cp), ++off) {
switch (TYPE(pp)) {
case P_IBTREE:
if (GET_BINTERNAL(pp, nxt)->type == B_KEYDATA)
nbytes =
BINTERNAL_SIZE(GET_BINTERNAL(pp, nxt)->len);
else
nbytes = BINTERNAL_SIZE(BOVERFLOW_SIZE);
break;
case P_LBTREE:
/*
* If we're on a key and it's a duplicate, just copy
* the offset.
*/
if (off != 0 && (nxt % P_INDX) == 0 &&
pp->inp[nxt] == pp->inp[nxt - P_INDX]) {
cp->inp[off] = cp->inp[off - P_INDX];
continue;
}
/* FALLTHROUGH */
case P_LRECNO:
if (GET_BKEYDATA(pp, nxt)->type == B_KEYDATA)
nbytes =
BKEYDATA_SIZE(GET_BKEYDATA(pp, nxt)->len);
else
nbytes = BOVERFLOW_SIZE;
break;
case P_IRECNO:
nbytes = RINTERNAL_SIZE;
break;
default:
return (__db_pgfmt(dbp, pp->pgno));
}
cp->inp[off] = HOFFSET(cp) -= nbytes;
memcpy(P_ENTRY(cp, off), P_ENTRY(pp, nxt), nbytes);
}
return (0);
}

257
db2/btree/bt_stat.c Normal file
View File

@ -0,0 +1,257 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)bt_stat.c 10.11 (Sleepycat) 8/19/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "btree.h"
static void __bam_add_rstat __P((DB_BTREE_LSTAT *, DB_BTREE_STAT *));
/*
* __bam_stat --
* Gather/print the btree statistics
*
* PUBLIC: int __bam_stat __P((DB *, void *, void *(*)(size_t), int));
*/
int
__bam_stat(argdbp, spp, db_malloc, flags)
DB *argdbp;
void *spp;
void *(*db_malloc) __P((size_t));
int flags;
{
BTMETA *meta;
BTREE *t;
DB *dbp;
DB_BTREE_STAT *sp;
DB_LOCK lock;
PAGE *h;
db_pgno_t lastpgno, pgno;
int ret;
DEBUG_LWRITE(argdbp, NULL, "bam_stat", NULL, NULL, flags);
/* Check for invalid flags. */
if ((ret = __db_statchk(argdbp, flags)) != 0)
return (ret);
if (spp == NULL)
return (0);
GETHANDLE(argdbp, NULL, &dbp, ret);
t = dbp->internal;
/* Allocate and clear the structure. */
if ((sp = db_malloc == NULL ?
(DB_BTREE_STAT *)malloc(sizeof(*sp)) :
(DB_BTREE_STAT *)db_malloc(sizeof(*sp))) == NULL) {
ret = ENOMEM;
goto err;
}
memset(sp, 0, sizeof(*sp));
/* If the app just wants the record count, make it fast. */
if (LF_ISSET(DB_RECORDCOUNT)) {
pgno = PGNO_ROOT;
if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_READ, &lock)) != 0)
goto err;
if ((ret = __bam_pget(dbp, (PAGE **)&h, &pgno, 0)) != 0)
goto err;
sp->bt_nrecs = RE_NREC(h);
(void)memp_fput(dbp->mpf, h, 0);
(void)__BT_LPUT(dbp, lock);
goto done;
}
/* Get the meta-data page. */
pgno = PGNO_METADATA;
if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_READ, &lock)) != 0)
goto err;
if ((ret = __bam_pget(dbp, (PAGE **)&meta, &pgno, 0)) != 0)
goto err;
/* Translate the metadata flags. */
if (F_ISSET(meta, BTM_DUP))
sp->bt_flags |= DB_DUP;
if (F_ISSET(meta, BTM_FIXEDLEN))
sp->bt_flags |= DB_FIXEDLEN;
if (F_ISSET(meta, BTM_RECNUM))
sp->bt_flags |= DB_RECNUM;
if (F_ISSET(meta, BTM_RENUMBER))
sp->bt_flags |= DB_RENUMBER;
/*
* Get the maxkey, minkey, re_len and re_pad fields from the
* metadata.
*/
sp->bt_minkey = meta->minkey;
sp->bt_maxkey = meta->maxkey;
sp->bt_re_len = meta->re_len;
sp->bt_re_pad = meta->re_pad;
/* Get the page size from the DB. */
sp->bt_pagesize = dbp->pgsize;
/* Initialize counters with the meta-data page information. */
__bam_add_rstat(&meta->stat, sp);
/*
* Add in the local information from this handle.
*
* !!!
* This is a bit odd, but it gets us closer to the truth.
*/
__bam_add_rstat(&t->lstat, sp);
/* Walk the free list, counting pages. */
for (sp->bt_free = 0, pgno = meta->free; pgno != PGNO_INVALID;) {
++sp->bt_free;
if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0) {
(void)memp_fput(dbp->mpf, meta, 0);
(void)__BT_TLPUT(dbp, lock);
goto err;
}
pgno = h->next_pgno;
(void)memp_fput(dbp->mpf, h, 0);
}
/* Discard the meta-data page. */
(void)memp_fput(dbp->mpf, meta, 0);
(void)__BT_TLPUT(dbp, lock);
/* Get the root page. */
pgno = PGNO_ROOT;
if ((ret = __bam_lget(dbp, 0, PGNO_ROOT, DB_LOCK_READ, &lock)) != 0)
goto err;
if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0) {
(void)__BT_LPUT(dbp, lock);
goto err;
}
/* Get the levels from the root page. */
sp->bt_levels = h->level;
/*
* Determine the last page of the database, then walk it, counting
* things.
*/
if ((ret = memp_fget(dbp->mpf, &lastpgno, DB_MPOOL_LAST, &h)) != 0)
goto err;
(void)memp_fput(dbp->mpf, h, 0);
for (;;) {
switch (TYPE(h)) {
case P_INVALID:
break;
case P_IBTREE:
case P_IRECNO:
++sp->bt_int_pg;
sp->bt_int_pgfree += HOFFSET(h) - LOFFSET(h);
break;
case P_LBTREE:
++sp->bt_leaf_pg;
sp->bt_leaf_pgfree += HOFFSET(h) - LOFFSET(h);
sp->bt_nrecs += NUM_ENT(h) / P_INDX;
break;
case P_LRECNO:
++sp->bt_leaf_pg;
sp->bt_leaf_pgfree += HOFFSET(h) - LOFFSET(h);
sp->bt_nrecs += NUM_ENT(h);
break;
case P_DUPLICATE:
++sp->bt_dup_pg;
/* XXX MARGO: sp->bt_dup_pgfree; */
break;
case P_OVERFLOW:
++sp->bt_over_pg;
/* XXX MARGO: sp->bt_over_pgfree; */
break;
default:
(void)memp_fput(dbp->mpf, h, 0);
(void)__BT_LPUT(dbp, lock);
return (__db_pgfmt(dbp, pgno));
}
(void)memp_fput(dbp->mpf, h, 0);
(void)__BT_LPUT(dbp, lock);
if (++pgno > lastpgno)
break;
if (__bam_lget(dbp, 0, pgno, DB_LOCK_READ, &lock))
break;
if (memp_fget(dbp->mpf, &pgno, 0, &h) != 0) {
(void)__BT_LPUT(dbp, lock);
break;
}
}
done: *(DB_BTREE_STAT **)spp = sp;
ret = 0;
err: PUTHANDLE(dbp);
return (ret);
}
/*
* __bam_add_mstat --
* Add the local statistics to the meta-data page statistics.
*
* PUBLIC: void __bam_add_mstat __P((DB_BTREE_LSTAT *, DB_BTREE_LSTAT *));
*/
void
__bam_add_mstat(from, to)
DB_BTREE_LSTAT *from;
DB_BTREE_LSTAT *to;
{
to->bt_freed += from->bt_freed;
to->bt_pfxsaved += from->bt_pfxsaved;
to->bt_split += from->bt_split;
to->bt_rootsplit += from->bt_rootsplit;
to->bt_fastsplit += from->bt_fastsplit;
to->bt_added += from->bt_added;
to->bt_deleted += from->bt_deleted;
to->bt_get += from->bt_get;
to->bt_cache_hit += from->bt_cache_hit;
to->bt_cache_miss += from->bt_cache_miss;
}
/*
* __bam_add_rstat --
* Add the local statistics to the returned statistics.
*/
static void
__bam_add_rstat(from, to)
DB_BTREE_LSTAT *from;
DB_BTREE_STAT *to;
{
to->bt_freed += from->bt_freed;
to->bt_pfxsaved += from->bt_pfxsaved;
to->bt_split += from->bt_split;
to->bt_rootsplit += from->bt_rootsplit;
to->bt_fastsplit += from->bt_fastsplit;
to->bt_added += from->bt_added;
to->bt_deleted += from->bt_deleted;
to->bt_get += from->bt_get;
to->bt_cache_hit += from->bt_cache_hit;
to->bt_cache_miss += from->bt_cache_miss;
}

137
db2/btree/btree.src Normal file
View File

@ -0,0 +1,137 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)btree.src 10.3 (Sleepycat) 8/17/97";
#endif /* not lint */
PREFIX bam
/*
* BTREE-pg_alloc: used to record allocating a new page.
*
* meta_lsn: the meta-data page's original lsn.
* page_lsn: the allocated page's original lsn.
* pgno: the page allocated.
* next: the next page on the free list.
*/
BEGIN pg_alloc
ARG fileid u_int32_t lu
POINTER meta_lsn DB_LSN * lu
POINTER page_lsn DB_LSN * lu
ARG pgno db_pgno_t lu
ARG ptype u_int32_t lu
ARG next db_pgno_t lu
END
/*
* BTREE-pg_free: used to record freeing a page.
*
* pgno: the page being freed.
* meta_lsn: the meta-data page's original lsn.
* header: the header from the free'd page.
* next: the previous next pointer on the metadata page.
*/
BEGIN pg_free
ARG fileid u_int32_t lu
ARG pgno db_pgno_t lu
POINTER meta_lsn DB_LSN * lu
DBT header DBT s
ARG next db_pgno_t lu
END
/*
* BTREE-split: used to log a page split.
*
* left: the page number for the low-order contents.
* llsn: the left page's original LSN.
* right: the page number for the high-order contents.
* rlsn: the right page's original LSN.
* indx: the number of entries that went to the left page.
* npgno: the next page number
* nlsn: the next page's original LSN (or 0 if no next page).
* pg: the split page's contents before the split.
*/
BEGIN split
ARG fileid u_int32_t lu
ARG left db_pgno_t lu
POINTER llsn DB_LSN * lu
ARG right db_pgno_t lu
POINTER rlsn DB_LSN * lu
ARG indx u_int32_t lu
ARG npgno db_pgno_t lu
POINTER nlsn DB_LSN * lu
DBT pg DBT s
END
/*
* BTREE-rsplit: used to log a reverse-split
*
* pgno: the page number of the page copied over the root.
* pgdbt: the page being copied on the root page.
* rootent: last entry on the root page.
* rootlsn: the root page's original lsn.
*/
BEGIN rsplit
ARG fileid u_int32_t lu
ARG pgno db_pgno_t lu
DBT pgdbt DBT s
DBT rootent DBT s
POINTER rootlsn DB_LSN * lu
END
/*
* BTREE-adj: used to log the adjustment of an index.
*
* pgno: the page modified.
* lsn: the page's original lsn.
* indx: the index adjusted.
* indx_copy: the index to copy if inserting.
* is_insert: 0 if a delete, 1 if an insert.
*/
BEGIN adj
ARG fileid u_int32_t lu
ARG pgno db_pgno_t lu
POINTER lsn DB_LSN * lu
ARG indx u_int32_t lu
ARG indx_copy u_int32_t lu
ARG is_insert u_int32_t lu
END
/*
* BTREE-cadjust: used to adjust the count change in an internal page.
*
* pgno: the page modified.
* lsn: the page's original lsn.
* indx: the index to be adjusted.
* adjust: the signed adjustment.
* total: if the total tree entries count should be adjusted
*/
BEGIN cadjust
ARG fileid u_int32_t lu
ARG pgno db_pgno_t lu
POINTER lsn DB_LSN * lu
ARG indx u_int32_t lu
ARG adjust int32_t ld
ARG total int32_t ld
END
/*
* BTREE-cdel: used to log the intent-to-delete of a cursor record.
*
* pgno: the page modified.
* lsn: the page's original lsn.
* indx: the index to be deleted.
*/
BEGIN cdel
ARG fileid u_int32_t lu
ARG pgno db_pgno_t lu
POINTER lsn DB_LSN * lu
ARG indx u_int32_t lu
END

1279
db2/btree/btree_auto.c Normal file

File diff suppressed because it is too large Load Diff

48
db2/clib/getlong.c Normal file
View File

@ -0,0 +1,48 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)getlong.c 10.2 (Sleepycat) 5/1/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <errno.h>
#include <limits.h>
#include <stdlib.h>
#endif
#include "db.h"
#include "clib_ext.h"
/*
* get_long --
* Return a long value inside of basic parameters.
*
* PUBLIC: void get_long __P((char *, long, long, long *));
*/
void
get_long(p, min, max, storep)
char *p;
long min, max, *storep;
{
long val;
char *end;
errno = 0;
val = strtol(p, &end, 10);
if ((val == LONG_MIN || val == LONG_MAX) && errno == ERANGE)
err(1, "%s", p);
if (p[0] == '\0' || end[0] != '\0')
errx(1, "%s: Invalid numeric argument", p);
if (val < min)
errx(1, "%s: Less than minimum value (%ld)", p, min);
if (val > max)
errx(1, "%s: Greater than maximum value (%ld)", p, max);
*storep = val;
}

787
db2/common/db_appinit.c Normal file
View File

@ -0,0 +1,787 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)db_appinit.c 10.27 (Sleepycat) 8/23/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/param.h>
#include <sys/stat.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#endif
#include "db_int.h"
#include "shqueue.h"
#include "db_page.h"
#include "btree.h"
#include "hash.h"
#include "log.h"
#include "txn.h"
#include "clib_ext.h"
#include "common_ext.h"
static int __db_home __P((DB_ENV *, const char *, int));
static int __db_parse __P((DB_ENV *, char *));
static int __db_tmp_dir __P((DB_ENV *, int));
static int __db_tmp_open __P((DB_ENV *, char *, int *));
/*
* db_version --
* Return verision information.
*/
const char *
db_version(majverp, minverp, patchp)
int *majverp, *minverp, *patchp;
{
if (majverp != NULL)
*majverp = DB_VERSION_MAJOR;
if (minverp != NULL)
*minverp = DB_VERSION_MINOR;
if (patchp != NULL)
*patchp = DB_VERSION_PATCH;
return (DB_VERSION_STRING);
}
/*
* db_appinit --
* Initialize the application environment.
*/
int
db_appinit(db_home, db_config, dbenv, flags)
const char *db_home;
char * const *db_config;
DB_ENV *dbenv;
int flags;
{
FILE *fp;
int i_lock, i_log, i_mpool, i_txn, ret;
char *lp, **p, buf[MAXPATHLEN * 2];
/* Validate arguments. */
if (dbenv == NULL)
return (EINVAL);
#ifdef HAVE_SPINLOCKS
#define OKFLAGS \
(DB_CREATE | DB_NOMMAP | DB_THREAD | DB_INIT_LOCK | DB_INIT_LOG | \
DB_INIT_MPOOL | DB_INIT_TXN | DB_MPOOL_PRIVATE | DB_RECOVER | \
DB_RECOVER_FATAL | DB_TXN_NOSYNC | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT)
#else
#define OKFLAGS \
(DB_CREATE | DB_NOMMAP | DB_INIT_LOCK | DB_INIT_LOG | \
DB_INIT_MPOOL | DB_INIT_TXN | DB_MPOOL_PRIVATE | DB_RECOVER | \
DB_RECOVER_FATAL | DB_TXN_NOSYNC | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT)
#endif
if ((ret = __db_fchk(dbenv, "db_appinit", flags, OKFLAGS)) != 0)
return (ret);
#define RECOVERY_FLAGS (DB_CREATE | DB_INIT_TXN | DB_INIT_LOG)
if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) &&
LF_ISSET(RECOVERY_FLAGS) != RECOVERY_FLAGS)
return (__db_ferr(dbenv, "db_appinit", 1));
fp = NULL;
i_lock = i_log = i_mpool = i_txn = 0;
/* Set the database home. */
if ((ret = __db_home(dbenv, db_home, flags)) != 0)
goto err;
/* Parse the config array. */
for (p = (char **)db_config; p != NULL && *p != NULL; ++p)
if ((ret = __db_parse(dbenv, *p)) != 0)
goto err;
/* Parse the config file. */
if (dbenv->db_home != NULL) {
(void)snprintf(buf,
sizeof(buf), "%s/DB_CONFIG", dbenv->db_home);
if ((fp = fopen(buf, "r")) != NULL) {
while (fgets(buf, sizeof(buf), fp) != NULL) {
if ((lp = strchr(buf, '\n')) != NULL)
*lp = '\0';
if ((ret = __db_parse(dbenv, buf)) != 0)
goto err;
}
(void)fclose(fp);
}
}
/* Set up the tmp directory path. */
if (dbenv->db_tmp_dir == NULL &&
(ret = __db_tmp_dir(dbenv, flags)) != 0)
goto err;
/* Indicate that the path names have been set. */
F_SET(dbenv, DB_APP_INIT);
/*
* If we are doing recovery, remove all the regions.
*/
if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) {
/* Remove all the old shared memory regions. */
if ((ret = log_unlink(NULL, 1 /* force */, dbenv)) != 0)
goto err;
if ((ret = memp_unlink(NULL, 1 /* force */, dbenv)) != 0)
goto err;
if ((ret = lock_unlink(NULL, 1 /* force */, dbenv)) != 0)
goto err;
if ((ret = txn_unlink(NULL, 1 /* force */, dbenv)) != 0)
goto err;
}
/* Transactions imply logging. */
if (LF_ISSET(DB_INIT_TXN))
LF_SET(DB_INIT_LOG);
/* Default permissions are 0660. */
#undef DB_DEFPERM
#define DB_DEFPERM (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)
/* Initialize the subsystems. */
if (LF_ISSET(DB_INIT_LOCK)) {
if ((ret = lock_open(NULL,
LF_ISSET(DB_CREATE | DB_THREAD),
DB_DEFPERM, dbenv, &dbenv->lk_info)) != 0)
goto err;
i_lock = 1;
}
if (LF_ISSET(DB_INIT_LOG)) {
if ((ret = log_open(NULL,
LF_ISSET(DB_CREATE | DB_THREAD),
DB_DEFPERM, dbenv, &dbenv->lg_info)) != 0)
goto err;
i_log = 1;
}
if (LF_ISSET(DB_INIT_MPOOL)) {
if ((ret = memp_open(NULL,
LF_ISSET(DB_CREATE | DB_MPOOL_PRIVATE | DB_NOMMAP | DB_THREAD),
DB_DEFPERM, dbenv, &dbenv->mp_info)) != 0)
goto err;
i_mpool = 1;
}
if (LF_ISSET(DB_INIT_TXN)) {
if ((ret = txn_open(NULL,
LF_ISSET(DB_CREATE | DB_THREAD | DB_TXN_NOSYNC),
DB_DEFPERM, dbenv, &dbenv->tx_info)) != 0)
goto err;
i_txn = 1;
}
/* Initialize recovery. */
if (LF_ISSET(DB_INIT_TXN)) {
if ((ret = __bam_init_recover(dbenv)) != 0)
goto err;
if ((ret = __db_init_recover(dbenv)) != 0)
goto err;
if ((ret = __ham_init_recover(dbenv)) != 0)
goto err;
if ((ret = __log_init_recover(dbenv)) != 0)
goto err;
if ((ret = __txn_init_recover(dbenv)) != 0)
goto err;
}
/* Now run recovery if necessary. */
if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) && (ret =
__db_apprec(dbenv, LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL))) != 0)
goto err;
return (ret);
err: if (fp != NULL)
(void)fclose(fp);
if (i_lock)
(void)lock_close(dbenv->lk_info);
if (i_log)
(void)log_close(dbenv->lg_info);
if (i_mpool)
(void)memp_close(dbenv->mp_info);
if (i_txn)
(void)txn_close(dbenv->tx_info);
(void)db_appexit(dbenv);
return (ret);
}
/*
* db_appexit --
* Close down the default application environment.
*/
int
db_appexit(dbenv)
DB_ENV *dbenv;
{
int ret, t_ret;
char **p;
ret = 0;
/* Close subsystems. */
if (dbenv->tx_info && (t_ret = txn_close(dbenv->tx_info)) != 0)
if (ret == 0)
ret = t_ret;
if (dbenv->mp_info && (t_ret = memp_close(dbenv->mp_info)) != 0)
if (ret == 0)
ret = t_ret;
if (dbenv->lg_info && (t_ret = log_close(dbenv->lg_info)) != 0)
if (ret == 0)
ret = t_ret;
if (dbenv->lk_info && (t_ret = lock_close(dbenv->lk_info)) != 0)
if (ret == 0)
ret = t_ret;
/* Free allocated memory. */
if (dbenv->db_home != NULL)
FREES(dbenv->db_home);
if ((p = dbenv->db_data_dir) != NULL) {
for (; *p != NULL; ++p)
FREES(*p);
FREE(dbenv->db_data_dir, dbenv->data_cnt * sizeof(char **));
}
if (dbenv->db_log_dir != NULL)
FREES(dbenv->db_log_dir);
if (dbenv->db_tmp_dir != NULL)
FREES(dbenv->db_tmp_dir);
return (ret);
}
#define DB_ADDSTR(str) { \
if ((str) != NULL) { \
/* If leading slash, start over. */ \
if (__db_abspath(str)) { \
p = start; \
slash = 0; \
} \
/* Append to the current string. */ \
len = strlen(str); \
if (slash) \
*p++ = PATH_SEPARATOR[0]; \
memcpy(p, str, len); \
p += len; \
slash = strchr(PATH_SEPARATOR, p[-1]) == NULL; \
} \
}
/*
* __db_appname --
* Given an optional DB environment, directory and file name and type
* of call, build a path based on the db_appinit(3) rules, and return
* it in allocated space.
*
* PUBLIC: int __db_appname __P((DB_ENV *,
* PUBLIC: APPNAME, const char *, const char *, int *, char **));
*/
int
__db_appname(dbenv, appname, dir, file, fdp, namep)
DB_ENV *dbenv;
APPNAME appname;
const char *dir, *file;
int *fdp;
char **namep;
{
DB_ENV etmp;
size_t len;
int ret, slash, tmp_create, tmp_free;
const char *a, *b, *c;
int data_entry;
char *p, *start;
a = b = c = NULL;
data_entry = -1;
tmp_create = tmp_free = 0;
/*
* We don't return a name when creating temporary files, just an fd.
* Default to error now.
*/
if (fdp != NULL)
*fdp = -1;
if (namep != NULL)
*namep = NULL;
/*
* Absolute path names are never modified. If the file is an absolute
* path, we're done. If the directory is, simply append the file and
* return.
*/
if (file != NULL && __db_abspath(file))
return ((*namep = (char *)strdup(file)) == NULL ? ENOMEM : 0);
if (dir != NULL && __db_abspath(dir)) {
a = dir;
goto done;
}
/*
* DB_ENV DIR APPNAME RESULT
* -------------------------------------------
* null null none <tmp>/file
* null set none DIR/file
* set null none DB_HOME/file
* set set none DB_HOME/DIR/file
*
* DB_ENV FILE APPNAME RESULT
* -------------------------------------------
* null null DB_APP_DATA <tmp>/<create>
* null set DB_APP_DATA ./file
* set null DB_APP_DATA <tmp>/<create>
* set set DB_APP_DATA DB_HOME/DB_DATA_DIR/file
*
* DB_ENV DIR APPNAME RESULT
* -------------------------------------------
* null null DB_APP_LOG <tmp>/file
* null set DB_APP_LOG DIR/file
* set null DB_APP_LOG DB_HOME/DB_LOG_DIR/file
* set set DB_APP_LOG DB_HOME/DB_LOG_DIR/DIR/file
*
* DB_ENV APPNAME RESULT
* -------------------------------------------
* null DB_APP_TMP <tmp>/<create>
* set DB_APP_TMP DB_HOME/DB_TMP_DIR/<create>
*/
retry: switch (appname) {
case DB_APP_NONE:
if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT)) {
if (dir == NULL)
goto tmp;
a = dir;
} else {
a = dbenv->db_home;
b = dir;
}
break;
case DB_APP_DATA:
if (dir != NULL) {
__db_err(dbenv,
"DB_APP_DATA: illegal directory specification");
return (EINVAL);
}
if (file == NULL) {
tmp_create = 1;
goto tmp;
}
if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT))
a = PATH_DOT;
else {
a = dbenv->db_home;
if (dbenv->db_data_dir != NULL &&
(b = dbenv->db_data_dir[++data_entry]) == NULL) {
data_entry = -1;
b = dbenv->db_data_dir[0];
}
}
break;
case DB_APP_LOG:
if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT)) {
if (dir == NULL)
goto tmp;
a = dir;
} else {
a = dbenv->db_home;
b = dbenv->db_log_dir;
c = dir;
}
break;
case DB_APP_TMP:
if (dir != NULL || file != NULL) {
__db_err(dbenv,
"DB_APP_TMP: illegal directory or file specification");
return (EINVAL);
}
tmp_create = 1;
if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT))
goto tmp;
else {
a = dbenv->db_home;
b = dbenv->db_tmp_dir;
}
break;
}
/* Reference a file from the appropriate temporary directory. */
if (0) {
tmp: if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT)) {
memset(&etmp, 0, sizeof(etmp));
if ((ret = __db_tmp_dir(&etmp, DB_USE_ENVIRON)) != 0)
return (ret);
tmp_free = 1;
a = etmp.db_tmp_dir;
} else
a = dbenv->db_tmp_dir;
}
done: len =
(a == NULL ? 0 : strlen(a) + 1) +
(b == NULL ? 0 : strlen(b) + 1) +
(c == NULL ? 0 : strlen(c) + 1) +
(file == NULL ? 0 : strlen(file) + 1);
if ((start = (char *)malloc(len)) == NULL) {
__db_err(dbenv, "%s", strerror(ENOMEM));
if (tmp_free)
FREES(etmp.db_tmp_dir);
return (ENOMEM);
}
slash = 0;
p = start;
DB_ADDSTR(a);
DB_ADDSTR(b);
DB_ADDSTR(file);
*p = '\0';
/*
* If we're opening a data file, see if it exists. If it does,
* return it, otherwise, try and find another one to open.
*/
if (data_entry != -1 && __db_exists(start, NULL) != 0) {
FREES(start);
a = b = c = NULL;
goto retry;
}
/* Discard any space allocated to find the temp directory. */
if (tmp_free)
FREES(etmp.db_tmp_dir);
/* Create the file if so requested. */
if (tmp_create) {
ret = __db_tmp_open(dbenv, start, fdp);
FREES(start);
} else {
*namep = start;
ret = 0;
}
return (ret);
}
/*
* __db_home --
* Find the database home.
*/
static int
__db_home(dbenv, db_home, flags)
DB_ENV *dbenv;
const char *db_home;
int flags;
{
const char *p;
p = db_home;
/* Use the environment if it's permitted and initialized. */
#ifdef HAVE_GETUID
if (LF_ISSET(DB_USE_ENVIRON) ||
(LF_ISSET(DB_USE_ENVIRON_ROOT) && getuid() == 0)) {
#else
if (LF_ISSET(DB_USE_ENVIRON)) {
#endif
if ((p = getenv("DB_HOME")) == NULL)
p = db_home;
else if (p[0] == '\0') {
__db_err(dbenv,
"illegal DB_HOME environment variable");
return (EINVAL);
}
}
if (p == NULL)
return (0);
if ((dbenv->db_home = (char *)strdup(p)) == NULL) {
__db_err(dbenv, "%s", strerror(ENOMEM));
return (ENOMEM);
}
return (0);
}
/*
* __db_parse --
* Parse a single NAME VALUE pair.
*/
static int
__db_parse(dbenv, s)
DB_ENV *dbenv;
char *s;
{
int ret;
char *local_s, *name, *value, **p, *tp;
ret = 0;
/*
* We need to strdup the argument in case the caller passed us
* static data.
*/
if ((local_s = (char *)strdup(s)) == NULL)
return (ENOMEM);
tp = local_s;
while ((name = strsep(&tp, " \t")) != NULL && *name == '\0');
if (name == NULL)
goto illegal;
while ((value = strsep(&tp, " \t")) != NULL && *value == '\0');
if (value == NULL) {
illegal: ret = EINVAL;
__db_err(dbenv, "illegal name-value pair: %s", s);
goto err;
}
#define DATA_INIT_CNT 20 /* Start with 20 data slots. */
if (!strcmp(name, "DB_DATA_DIR")) {
if (dbenv->db_data_dir == NULL) {
if ((dbenv->db_data_dir = (char **)calloc(DATA_INIT_CNT,
sizeof(char **))) == NULL)
goto nomem;
dbenv->data_cnt = DATA_INIT_CNT;
} else if (dbenv->data_next == dbenv->data_cnt - 1) {
dbenv->data_cnt *= 2;
if ((dbenv->db_data_dir =
(char **)realloc(dbenv->db_data_dir,
dbenv->data_cnt * sizeof(char **))) == NULL)
goto nomem;
}
p = &dbenv->db_data_dir[dbenv->data_next++];
} else if (!strcmp(name, "DB_LOG_DIR")) {
if (dbenv->db_log_dir != NULL)
FREES(dbenv->db_log_dir);
p = &dbenv->db_log_dir;
} else if (!strcmp(name, "DB_TMP_DIR")) {
if (dbenv->db_tmp_dir != NULL)
FREES(dbenv->db_tmp_dir);
p = &dbenv->db_tmp_dir;
} else
goto err;
if ((*p = (char *)strdup(value)) == NULL) {
nomem: ret = ENOMEM;
__db_err(dbenv, "%s", strerror(ENOMEM));
}
err: FREES(local_s);
return (ret);
}
#ifdef macintosh
#include <TFileSpec.h>
static char *sTempFolder;
#endif
/*
* tmp --
* Set the temporary directory path.
*/
static int
__db_tmp_dir(dbenv, flags)
DB_ENV *dbenv;
int flags;
{
static const char * list[] = { /* Ordered: see db_appinit(3). */
"/var/tmp",
"/usr/tmp",
"/temp", /* WIN32. */
"/tmp",
"C:/temp", /* WIN32. */
"C:/tmp", /* WIN32. */
NULL
};
const char **lp, *p;
/* Use the environment if it's permitted and initialized. */
p = NULL;
#ifdef HAVE_GETEUID
if (LF_ISSET(DB_USE_ENVIRON) ||
(LF_ISSET(DB_USE_ENVIRON_ROOT) && getuid() == 0)) {
#else
if (LF_ISSET(DB_USE_ENVIRON)) {
#endif
if ((p = getenv("TMPDIR")) != NULL && p[0] == '\0') {
__db_err(dbenv, "illegal TMPDIR environment variable");
return (EINVAL);
}
/* WIN32 */
if (p == NULL && (p = getenv("TEMP")) != NULL && p[0] == '\0') {
__db_err(dbenv, "illegal TEMP environment variable");
return (EINVAL);
}
/* WIN32 */
if (p == NULL && (p = getenv("TMP")) != NULL && p[0] == '\0') {
__db_err(dbenv, "illegal TMP environment variable");
return (EINVAL);
}
/* Macintosh */
if (p == NULL &&
(p = getenv("TempFolder")) != NULL && p[0] == '\0') {
__db_err(dbenv,
"illegal TempFolder environment variable");
return (EINVAL);
}
}
#ifdef macintosh
/* Get the path to the temporary folder. */
if (p == NULL) {
FSSpec spec;
if (!Special2FSSpec(kTemporaryFolderType,
kOnSystemDisk, 0, &spec)) {
p = FSp2FullPath(&spec);
sTempFolder = malloc(strlen(p) + 1);
strcpy(sTempFolder, p);
p = sTempFolder;
}
}
#endif
/* Step through the list looking for a possibility. */
if (p == NULL)
for (lp = list; *lp != NULL; ++lp)
if (__db_exists(p = *lp, NULL) == 0)
break;
if (p == NULL)
return (0);
if ((dbenv->db_tmp_dir = (char *)strdup(p)) == NULL) {
__db_err(dbenv, "%s", strerror(ENOMEM));
return (ENOMEM);
}
return (0);
}
/*
* __db_tmp_open --
* Create a temporary file.
*/
static int
__db_tmp_open(dbenv, dir, fdp)
DB_ENV *dbenv;
char *dir;
int *fdp;
{
#ifdef HAVE_SIGFILLSET
sigset_t set, oset;
#endif
u_long pid;
size_t len;
int isdir, ret;
char *trv, buf[MAXPATHLEN];
/*
* Check the target directory; if you have six X's and it doesn't
* exist, this runs for a *very* long time.
*/
if ((ret = __db_exists(dir, &isdir)) != 0) {
__db_err(dbenv, "%s: %s", dir, strerror(ret));
return (ret);
}
if (!isdir) {
__db_err(dbenv, "%s: %s", dir, strerror(EINVAL));
return (EINVAL);
}
/* Build the path. */
#define DB_TRAIL "/XXXXXX"
if ((len = strlen(dir)) + sizeof(DB_TRAIL) > sizeof(buf)) {
__db_err(dbenv,
"tmp_open: %s: %s", buf, strerror(ENAMETOOLONG));
return (ENAMETOOLONG);
}
(void)strcpy(buf, dir);
(void)strcpy(buf + len, DB_TRAIL);
buf[len] = PATH_SEPARATOR[0]; /* WIN32 */
/*
* Replace the X's with the process ID. Pid should be a pid_t,
* but we use unsigned long for portability.
*/
for (pid = getpid(),
trv = buf + len + sizeof(DB_TRAIL) - 1; *--trv == 'X'; pid /= 10)
switch (pid % 10) {
case 0: *trv = '0'; break;
case 1: *trv = '1'; break;
case 2: *trv = '2'; break;
case 3: *trv = '3'; break;
case 4: *trv = '4'; break;
case 5: *trv = '5'; break;
case 6: *trv = '6'; break;
case 7: *trv = '7'; break;
case 8: *trv = '8'; break;
case 9: *trv = '9'; break;
}
++trv;
/*
* Try and open a file. We block every signal we can get our hands
* on so that, if we're interrupted at the wrong time, the temporary
* file isn't left around -- of course, if we drop core in-between
* the calls we'll hang forever, but that's probably okay. ;-}
*/
#ifdef HAVE_SIGFILLSET
(void)sigfillset(&set);
#endif
for (;;) {
#ifdef HAVE_SIGFILLSET
(void)sigprocmask(SIG_BLOCK, &set, &oset);
#endif
#define DB_TEMPOPEN DB_CREATE | DB_EXCL | DB_TEMPORARY
if ((ret = __db_fdopen(buf,
DB_TEMPOPEN, DB_TEMPOPEN, S_IRUSR | S_IWUSR, fdp)) == 0) {
#ifdef HAVE_SIGFILLSET
(void)sigprocmask(SIG_SETMASK, &oset, NULL);
#endif
return (0);
}
#ifdef HAVE_SIGFILLSET
(void)sigprocmask(SIG_SETMASK, &oset, NULL);
#endif
/*
* XXX:
* If we don't get an EEXIST error, then there's something
* seriously wrong. Unfortunately, if the implementation
* doesn't return EEXIST for O_CREAT and O_EXCL regardless
* of other possible errors, we've lost.
*/
if (ret != EEXIST) {
__db_err(dbenv,
"tmp_open: %s: %s", buf, strerror(ret));
return (ret);
}
/*
* Tricky little algorithm for backward compatibility.
* Assumes the ASCII ordering of lower-case characters.
*/
for (;;) {
if (*trv == '\0')
return (EINVAL);
if (*trv == 'z')
*trv++ = 'a';
else {
if (isdigit(*trv))
*trv = 'a';
else
++*trv;
break;
}
}
}
/* NOTREACHED */
}

143
db2/common/db_apprec.c Normal file
View File

@ -0,0 +1,143 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char copyright[] =
"@(#) Copyright (c) 1997\n\
Sleepycat Software Inc. All rights reserved.\n";
static const char sccsid[] = "@(#)db_apprec.c 10.15 (Sleepycat) 7/27/97";
#endif
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <time.h>
#include <string.h>
#endif
#include "db_int.h"
#include "shqueue.h"
#include "db_page.h"
#include "db_dispatch.h"
#include "db_am.h"
#include "log.h"
#include "txn.h"
#include "common_ext.h"
/*
* __db_apprec --
* Perform recovery.
*
* PUBLIC: int __db_apprec __P((DB_ENV *, int));
*/
int
__db_apprec(dbenv, flags)
DB_ENV *dbenv;
int flags;
{
DBT data;
DB_LOG *lp;
DB_LSN ckp_lsn, first_lsn, lsn, tmp_lsn;
time_t now;
int first_flag, ret, tret;
void *txninfo;
ZERO_LSN(ckp_lsn);
/* Initialize the transaction list. */
if ((ret = __db_txnlist_init(&txninfo)) != 0)
return (ret);
/*
* Read forward through the log opening the appropriate files
* so that we can call recovery routines. In general, we start
* at the last checkpoint prior to the last checkpointed LSN.
* For catastrophic recovery, we begin at the first LSN that
* appears in any log file (log figures this out for us when
* we pass it the DB_FIRST flag).
*/
lp = dbenv->lg_info;
if (LF_ISSET(DB_RECOVER_FATAL))
first_flag = DB_FIRST;
else
first_flag = __log_findckp(lp, &lsn) != 0 ? DB_FIRST : DB_SET;
memset(&data, 0, sizeof(data));
if ((ret = log_get(lp, &lsn, &data, first_flag)) != 0) {
__db_err(dbenv, "Failure: unable to get log record");
if (first_flag == DB_SET)
__db_err(dbenv, "Retrieving LSN %lu %lu",
(u_long)lsn.file, (u_long)lsn.offset);
else
__db_err(dbenv, "Retrieving first LSN");
goto err;
}
first_lsn = lsn;
for (; ret == 0;
ret = log_get(dbenv->lg_info, &lsn, &data, DB_NEXT))
if ((tret = __db_dispatch(lp,
&data, &lsn, TXN_OPENFILES, txninfo)) < 0) {
ret = tret;
goto msgerr;
}
for (ret = log_get(lp, &lsn, &data, DB_LAST);
ret == 0 && log_compare(&lsn, &first_lsn) > 0;
ret = log_get(lp,&lsn, &data, DB_PREV)) {
tmp_lsn = lsn;
tret =
__db_dispatch(lp, &data, &lsn, TXN_BACKWARD_ROLL, txninfo);
if (IS_ZERO_LSN(ckp_lsn) && tret > 0)
ckp_lsn = tmp_lsn;
if (tret < 0) {
ret = tret;
goto msgerr;
}
}
for (ret = log_get(lp, &lsn, &data, DB_NEXT);
ret == 0; ret = log_get(lp, &lsn, &data, DB_NEXT))
if ((tret = __db_dispatch(lp,
&data, &lsn, TXN_FORWARD_ROLL, txninfo)) < 0) {
ret = tret;
goto msgerr;
}
/* Now close all the db files that are open. */
__log_close_files(lp);
/*
* Now set the maximum transaction id, set the last checkpoint lsn,
* and the current time. Then take a checkpoint.
*/
(void)time(&now);
dbenv->tx_info->region->last_txnid = ((__db_txnhead *)txninfo)->maxid;
dbenv->tx_info->region->last_ckp = ckp_lsn;
dbenv->tx_info->region->time_ckp = (u_int32_t) now;
txn_checkpoint(dbenv->tx_info, 0, 0);
if (dbenv->db_verbose) {
__db_err(lp->dbenv, "Recovery complete at %s", ctime(&now));
__db_err(lp->dbenv, "%s %lu %s [%lu][%lu]",
"Maximum transaction id",
(u_long)dbenv->tx_info->region->last_txnid,
"Recovery checkpoint",
(u_long)dbenv->tx_info->region->last_ckp.file,
(u_long)dbenv->tx_info->region->last_ckp.offset);
}
return (0);
msgerr: __db_err(dbenv, "Recovery function for LSN %lu %lu failed",
(u_long)lsn.file, (u_long)lsn.offset);
err: return (ret);
}

56
db2/common/db_byteorder.c Normal file
View File

@ -0,0 +1,56 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)db_byteorder.c 10.3 (Sleepycat) 6/21/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#endif
#include "db_int.h"
#include "common_ext.h"
/*
* __db_byteorder --
* Return if we need to do byte swapping, checking for illegal
* values.
*
* PUBLIC: int __db_byteorder __P((DB_ENV *, int));
*/
int
__db_byteorder(dbenv, lorder)
DB_ENV *dbenv;
int lorder;
{
switch (lorder) {
case 0:
break;
case 1234:
#if defined(WORDS_BIGENDIAN)
return (DB_SWAPBYTES);
#else
break;
#endif
case 4321:
#if defined(WORDS_BIGENDIAN)
break;
#else
return (DB_SWAPBYTES);
#endif
default:
__db_err(dbenv,
"illegal byte order, only big and little-endian supported");
return (EINVAL);
}
return (0);
}

548
db2/common/db_err.c Normal file
View File

@ -0,0 +1,548 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)db_err.c 10.16 (Sleepycat) 8/24/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#ifdef __STDC__
#include <stdarg.h>
#else
#include <varargs.h>
#endif
#endif
#include "db_int.h"
#include "common_ext.h"
static int __db_rdonly __P((const DB_ENV *, const char *));
/*
* __db_err --
* Standard DB error routine.
*
* PUBLIC: #ifdef __STDC__
* PUBLIC: void __db_err __P((const DB_ENV *dbenv, const char *fmt, ...));
* PUBLIC: #else
* PUBLIC: void __db_err();
* PUBLIC: #endif
*/
void
#ifdef __STDC__
__db_err(const DB_ENV *dbenv, const char *fmt, ...)
#else
__db_err(dbenv, fmt, va_alist)
const DB_ENV *dbenv;
const char *fmt;
va_dcl
#endif
{
va_list ap;
char errbuf[2048]; /* XXX: END OF THE STACK DON'T TRUST SPRINTF. */
if (dbenv == NULL)
return;
#ifdef __STDC__
va_start(ap, fmt);
#else
va_start(ap);
#endif
if (dbenv->db_errcall != NULL) {
(void)vsnprintf(errbuf, sizeof(errbuf), fmt, ap);
dbenv->db_errcall(dbenv->db_errpfx, errbuf);
}
if (dbenv->db_errfile != NULL) {
if (dbenv->db_errpfx != NULL)
(void)fprintf(dbenv->db_errfile, "%s: ",
dbenv->db_errpfx);
(void)vfprintf(dbenv->db_errfile, fmt, ap);
(void)fprintf(dbenv->db_errfile, "\n");
(void)fflush(dbenv->db_errfile);
}
va_end(ap);
}
/*
* XXX
* Provide ANSI C prototypes for the panic functions. Some compilers, (e.g.,
* MS VC 4.2) get upset if they aren't here, even though the K&R declaration
* appears before the assignment in the __db__panic() call.
*/
static int __db_ecursor __P((DB *, DB_TXN *, DBC **));
static int __db_edel __P((DB *, DB_TXN *, DBT *, int));
static int __db_efd __P((DB *, int *));
static int __db_egp __P((DB *, DB_TXN *, DBT *, DBT *, int));
static int __db_estat __P((DB *, void *, void *(*)(size_t), int));
static int __db_esync __P((DB *, int));
/*
* __db_ecursor --
* After-panic cursor routine.
*/
static int
__db_ecursor(a, b, c)
DB *a;
DB_TXN *b;
DBC **c;
{
a = a; b = b; c = c; /* XXX: Shut the compiler up. */
return (EPERM);
}
/*
* __db_edel --
* After-panic delete routine.
*/
static int
__db_edel(a, b, c, d)
DB *a;
DB_TXN *b;
DBT *c;
int d;
{
a = a; b = b; c = c; d = d; /* XXX: Shut the compiler up. */
return (EPERM);
}
/*
* __db_efd --
* After-panic fd routine.
*/
static int
__db_efd(a, b)
DB *a;
int *b;
{
a = a; b = b; /* XXX: Shut the compiler up. */
return (EPERM);
}
/*
* __db_egp --
* After-panic get/put routine.
*/
static int
__db_egp(a, b, c, d, e)
DB *a;
DB_TXN *b;
DBT *c, *d;
int e;
{
a = a; b = b; c = c; d = d; e = e; /* XXX: Shut the compiler up. */
return (EPERM);
}
/*
* __db_estat --
* After-panic stat routine.
*/
static int
__db_estat(a, b, c, d)
DB *a;
void *b;
void *(*c) __P((size_t));
int d;
{
a = a; b = b; c = c; d = d; /* XXX: Shut the compiler up. */
return (EPERM);
}
/*
* __db_esync --
* After-panic sync routine.
*/
static int
__db_esync(a, b)
DB *a;
int b;
{
a = a; b = b; /* XXX: Shut the compiler up. */
return (EPERM);
}
/*
* __db_panic --
* Lock out the tree due to unrecoverable error.
*
* PUBLIC: int __db_panic __P((DB *));
*/
int
__db_panic(dbp)
DB *dbp;
{
/*
* XXX
* We should shut down all of the process's cursors, too.
*
* We should call mpool and have it shut down the file, so we get
* other processes sharing this file as well.
*/
dbp->cursor = __db_ecursor;
dbp->del = __db_edel;
dbp->fd = __db_efd;
dbp->get = __db_egp;
dbp->put = __db_egp;
dbp->stat = __db_estat;
dbp->sync = __db_esync;
return (EPERM);
}
/* Check for invalid flags. */
#undef DB_CHECK_FLAGS
#define DB_CHECK_FLAGS(dbenv, name, flags, ok_flags) \
if ((flags) & ~(ok_flags)) \
return (__db_ferr(dbenv, name, 0));
/* Check for invalid flag combinations. */
#undef DB_CHECK_FCOMBO
#define DB_CHECK_FCOMBO(dbenv, name, flags, flag1, flag2) \
if ((flags) & (flag1) && (flags) & (flag2)) \
return (__db_ferr(dbenv, name, 1));
/*
* __db_fchk --
* General flags checking routine.
*
* PUBLIC: int __db_fchk __P((DB_ENV *, char *, int, int));
*/
int
__db_fchk(dbenv, name, flags, ok_flags)
DB_ENV *dbenv;
const char *name;
int flags, ok_flags;
{
DB_CHECK_FLAGS(dbenv, name, flags, ok_flags);
return (0);
}
/*
* __db_fcchk --
* General combination flags checking routine.
*
* PUBLIC: int __db_fcchk __P((DB_ENV *, char *, int, int, int));
*/
int
__db_fcchk(dbenv, name, flags, flag1, flag2)
DB_ENV *dbenv;
const char *name;
int flags, flag1, flag2;
{
DB_CHECK_FCOMBO(dbenv, name, flags, flag1, flag2);
return (0);
}
/*
* __db_cdelchk --
* Common cursor delete argument checking routine.
*
* PUBLIC: int __db_cdelchk __P((const DB *, int, int, int));
*/
int
__db_cdelchk(dbp, flags, isrdonly, isvalid)
const DB *dbp;
int flags, isrdonly, isvalid;
{
/* Check for changes to a read-only tree. */
if (isrdonly)
return (__db_rdonly(dbp->dbenv, "c_del"));
/* Check for invalid dbc->c_del() function flags. */
DB_CHECK_FLAGS(dbp->dbenv, "c_del", flags, 0);
/*
* The cursor must be initialized, return -1 for an invalid cursor,
* otherwise 0.
*/
return (isvalid ? 0 : EINVAL);
}
/*
* __db_cgetchk --
* Common cursor get argument checking routine.
*
* PUBLIC: int __db_cgetchk __P((const DB *, DBT *, DBT *, int, int));
*/
int
__db_cgetchk(dbp, key, data, flags, isvalid)
const DB *dbp;
DBT *key, *data;
int flags, isvalid;
{
int check_key;
check_key = 0;
/* Check for invalid dbc->c_get() function flags. */
switch (flags) {
case DB_CURRENT:
case DB_FIRST:
case DB_LAST:
case DB_NEXT:
case DB_PREV:
case DB_SET_RANGE:
check_key = 1;
break;
case DB_SET:
break;
case DB_SET_RECNO:
case DB_GET_RECNO:
if (!F_ISSET(dbp, DB_BT_RECNUM))
goto err;
check_key = 1;
break;
default:
err: return (__db_ferr(dbp->dbenv, "c_get", 0));
}
/* Check for invalid key/data flags. */
DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags,
DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
/* Check dbt's for valid flags when multi-threaded. */
if (F_ISSET(dbp, DB_AM_THREAD)) {
if (!F_ISSET(data, DB_DBT_USERMEM | DB_DBT_MALLOC))
return (__db_ferr(dbp->dbenv, "threaded data", 1));
if (check_key &&
!F_ISSET(key, DB_DBT_USERMEM | DB_DBT_MALLOC))
return (__db_ferr(dbp->dbenv, "threaded key", 1));
}
/*
* The cursor must be initialized for DB_CURRENT, return -1 for an
* invalid cursor, otherwise 0.
*/
return (isvalid || flags != DB_CURRENT ? 0 : EINVAL);
}
/*
* __db_cputchk --
* Common cursor put argument checking routine.
*
* PUBLIC: int __db_cputchk __P((const DB *,
* PUBLIC: const DBT *, DBT *, int, int, int));
*/
int
__db_cputchk(dbp, key, data, flags, isrdonly, isvalid)
const DB *dbp;
const DBT *key;
DBT *data;
int flags, isrdonly, isvalid;
{
int check_key;
/* Check for changes to a read-only tree. */
if (isrdonly)
return (__db_rdonly(dbp->dbenv, "c_put"));
/* Check for invalid dbc->c_put() function flags. */
check_key = 0;
switch (flags) {
case DB_AFTER:
case DB_BEFORE:
if (dbp->type == DB_RECNO && !F_ISSET(dbp, DB_RE_RENUMBER))
goto err;
if (dbp->type != DB_RECNO && !F_ISSET(dbp, DB_AM_DUP))
goto err;
break;
case DB_CURRENT:
break;
case DB_KEYFIRST:
case DB_KEYLAST:
if (dbp->type == DB_RECNO)
goto err;
check_key = 1;
break;
default:
err: return (__db_ferr(dbp->dbenv, "c_put", 0));
}
/* Check for invalid key/data flags. */
if (check_key)
DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags,
DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
/*
* The cursor must be initialized for anything other than DB_KEYFIRST
* and DB_KEYLAST, return -1 for an invalid cursor, otherwise 0.
*/
return (isvalid ||
(flags != DB_KEYFIRST && flags != DB_KEYLAST) ? 0 : EINVAL);
}
/*
* __db_delchk --
* Common delete argument checking routine.
*
* PUBLIC: int __db_delchk __P((const DB *, int, int));
*/
int
__db_delchk(dbp, flags, isrdonly)
const DB *dbp;
int flags, isrdonly;
{
/* Check for changes to a read-only tree. */
if (isrdonly)
return (__db_rdonly(dbp->dbenv, "delete"));
/* Check for invalid db->del() function flags. */
DB_CHECK_FLAGS(dbp->dbenv, "delete", flags, 0);
return (0);
}
/*
* __db_getchk --
* Common get argument checking routine.
*
* PUBLIC: int __db_getchk __P((const DB *, const DBT *, DBT *, int));
*/
int
__db_getchk(dbp, key, data, flags)
const DB *dbp;
const DBT *key;
DBT *data;
int flags;
{
/* Check for invalid db->get() function flags. */
DB_CHECK_FLAGS(dbp->dbenv,
"get", flags, F_ISSET(dbp, DB_BT_RECNUM) ? DB_SET_RECNO : 0);
/* Check for invalid key/data flags. */
DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags, 0);
DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
DB_CHECK_FCOMBO(dbp->dbenv,
"data", data->flags, DB_DBT_MALLOC, DB_DBT_USERMEM);
if (F_ISSET(dbp, DB_AM_THREAD) &&
!F_ISSET(data, DB_DBT_MALLOC | DB_DBT_USERMEM))
return (__db_ferr(dbp->dbenv, "threaded data", 1));
return (0);
}
/*
* __db_putchk --
* Common put argument checking routine.
*
* PUBLIC: int __db_putchk __P((const DB *, DBT *, const DBT *, int, int, int));
*/
int
__db_putchk(dbp, key, data, flags, isrdonly, isdup)
const DB *dbp;
DBT *key;
const DBT *data;
int flags, isrdonly, isdup;
{
/* Check for changes to a read-only tree. */
if (isrdonly)
return (__db_rdonly(dbp->dbenv, "put"));
/* Check for invalid db->put() function flags. */
DB_CHECK_FLAGS(dbp->dbenv, "put", flags,
DB_NOOVERWRITE | (dbp->type == DB_RECNO ? DB_APPEND : 0));
/* Check for invalid key/data flags. */
DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags, 0);
DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
DB_CHECK_FCOMBO(dbp->dbenv,
"data", data->flags, DB_DBT_MALLOC, DB_DBT_USERMEM);
/* Check for partial puts in the presence of duplicates. */
if (isdup && F_ISSET(data, DB_DBT_PARTIAL)) {
__db_err(dbp->dbenv,
"a partial put in the presence of duplicates requires a cursor operation");
return (EINVAL);
}
return (0);
}
/*
* __db_statchk --
* Common stat argument checking routine.
*
* PUBLIC: int __db_statchk __P((const DB *, int));
*/
int
__db_statchk(dbp, flags)
const DB *dbp;
int flags;
{
/* Check for invalid db->stat() function flags. */
DB_CHECK_FLAGS(dbp->dbenv, "stat", flags, DB_RECORDCOUNT);
if (LF_ISSET(DB_RECORDCOUNT) &&
dbp->type == DB_BTREE && !F_ISSET(dbp, DB_BT_RECNUM))
return (__db_ferr(dbp->dbenv, "stat", 0));
return (0);
}
/*
* __db_syncchk --
* Common sync argument checking routine.
*
* PUBLIC: int __db_syncchk __P((const DB *, int));
*/
int
__db_syncchk(dbp, flags)
const DB *dbp;
int flags;
{
/* Check for invalid db->sync() function flags. */
DB_CHECK_FLAGS(dbp->dbenv, "sync", flags, 0);
return (0);
}
/*
* __db_ferr --
* Common flag errors.
*
* PUBLIC: int __db_ferr __P((const DB_ENV *, char *, int));
*/
int
__db_ferr(dbenv, name, combo)
const DB_ENV *dbenv;
const char *name;
int combo;
{
__db_err(dbenv, "illegal flag %sspecified to %s",
combo ? "combination " : "", name);
return (EINVAL);
}
/*
* __db_rdonly --
* Common readonly message.
*/
static int
__db_rdonly(dbenv, name)
const DB_ENV *dbenv;
const char *name;
{
__db_err(dbenv, "%s: attempt to modify a read-only tree", name);
return (EACCES);
}

68
db2/common/db_log2.c Normal file
View File

@ -0,0 +1,68 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1995, 1996
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)db_log2.c 10.3 (Sleepycat) 6/21/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#endif
#include "db_int.h"
#include "common_ext.h"
/*
* PUBLIC: u_int32_t __db_log2 __P((u_int32_t));
*/
u_int32_t
__db_log2(num)
u_int32_t num;
{
u_int32_t i, limit;
limit = 1;
for (i = 0; limit < num; limit = limit << 1, i++);
return (i);
}

565
db2/common/db_region.c Normal file
View File

@ -0,0 +1,565 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1995, 1996
* The President and Fellows of Harvard University. All rights reserved.
*
* This code is derived from software contributed to Harvard by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)db_region.c 10.12 (Sleepycat) 7/26/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#endif
#include "db_int.h"
#include "common_ext.h"
static int __db_rmap __P((DB_ENV *, int, size_t, void *));
/*
* __db_rcreate --
*
* Common interface for creating a shared region. Handles synchronization
* across multiple processes.
*
* The dbenv contains the environment for this process, including naming
* information. The path argument represents the parameters passed to
* the open routines and may be either a file or a directory. If it is
* a directory, it must exist. If it is a file, then the file parameter
* must be NULL, otherwise, file is the name to be created inside the
* directory path.
*
* The function returns a pointer to the shared region that has been mapped
* into memory, NULL on error.
*
* PUBLIC: int __db_rcreate __P((DB_ENV *, APPNAME,
* PUBLIC: const char *, const char *, int, size_t, int *, void *));
*/
int
__db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp)
DB_ENV *dbenv;
APPNAME appname;
const char *path, *file;
int mode, *fdp;
size_t size;
void *retp;
{
RLAYOUT *rp;
int fd, ret;
char *name;
fd = -1;
rp = NULL;
/*
* Get the filename -- note, if it's a temporary file, it will
* be created by the underlying temporary file creation code,
* so we have to check the file descriptor to be sure it's an
* error.
*/
if ((ret = __db_appname(dbenv, appname, path, file, &fd, &name)) != 0)
return (ret);
/*
* Now open the file. We need to make sure that multiple processes
* that attempt to create the region at the same time are properly
* ordered, so we open it O_EXCL and O_CREAT so two simultaneous
* attempts to create the region will return failure in one of the
* attempts.
*/
if (fd == -1 && (ret = __db_fdopen(name,
DB_CREATE | DB_EXCL, DB_CREATE | DB_EXCL, mode, &fd)) != 0) {
if (ret != EEXIST)
__db_err(dbenv,
"region create: %s: %s", name, strerror(ret));
goto err;
}
*fdp = fd;
/* Grow the region to the correct size. */
if ((ret = __db_rgrow(dbenv, fd, size)) != 0)
goto err;
/* Map the region in. */
if ((ret = __db_rmap(dbenv, fd, size, &rp)) != 0)
goto err;
/*
* Initialize the common information.
*
* !!!
* We have to order the region creates so that two processes don't try
* to simultaneously create the region and so that processes that are
* joining the region never see inconsistent data. We'd like to play
* file permissions games, but we can't because WNT filesystems won't
* open a file mode 0.
*
* So, the process that's creating the region always acquires the lock
* before the setting the version number. Any process joining always
* checks the version number before attempting to acquire the lock.
*
* We have to check the version number first, because if the version
* number has not been written, it's possible that the mutex has not
* been initialized in which case an attempt to get it could lead to
* random behavior. If the version number isn't there (the file size
* is too small) or it's 0, we know that the region is being created.
*/
(void)__db_mutex_init(&rp->lock, MUTEX_LOCK_OFFSET(rp, &rp->lock));
(void)__db_mutex_lock(&rp->lock,
fd, dbenv == NULL ? NULL : dbenv->db_yield);
rp->refcnt = 1;
rp->size = size;
rp->flags = 0;
db_version(&rp->majver, &rp->minver, &rp->patch);
if (name != NULL)
FREES(name);
*(void **)retp = rp;
return (0);
err: if (fd != -1) {
if (rp != NULL)
(void)__db_munmap(rp, rp->size);
(void)__db_unlink(name);
(void)__db_close(fd);
}
if (name != NULL)
FREES(name);
return (ret);
}
/*
* __db_ropen --
* Construct the name of a file, open it and map it in.
*
* PUBLIC: int __db_ropen __P((DB_ENV *,
* PUBLIC: APPNAME, const char *, const char *, int, int *, void *));
*/
int
__db_ropen(dbenv, appname, path, file, flags, fdp, retp)
DB_ENV *dbenv;
APPNAME appname;
const char *path, *file;
int flags, *fdp;
void *retp;
{
RLAYOUT *rp;
off_t size1, size2;
int fd, ret;
char *name;
fd = -1;
rp = NULL;
/* Get the filename. */
if ((ret = __db_appname(dbenv, appname, path, file, NULL, &name)) != 0)
return (ret);
/* Open the file. */
if ((ret = __db_fdopen(name, flags, DB_MUTEXDEBUG, 0, &fd)) != 0) {
__db_err(dbenv, "region open: %s: %s", name, strerror(ret));
goto err2;
}
*fdp = fd;
/*
* Map the file in. We have to do things in a strange order so that
* we don't get into a situation where the file was just created and
* isn't yet initialized. See the comment in __db_rcreate() above.
*
* XXX
* We'd like to test to see if the file is too big to mmap. Since we
* don't know what size or type off_t's or size_t's are, or the largest
* unsigned integral type is, or what random insanity the local C
* compiler will perpetrate, doing the comparison in a portable way is
* flatly impossible. Hope that mmap fails if the file is too large.
*
*/
if ((ret = __db_stat(dbenv, name, fd, &size1, NULL)) != 0)
goto err2;
/* Check to make sure the first block has been written. */
if ((size_t) size1 < sizeof(RLAYOUT)) {
ret = EAGAIN;
goto err2;
}
/* Map in whatever is there. */
if ((ret = __db_rmap(dbenv, fd, size1, &rp)) != 0)
goto err2;
/*
* Check to make sure the region has been initialized. We can't just
* grab the lock because the lock may not have been initialized yet.
*/
if (rp->majver == 0) {
ret = EAGAIN;
goto err2;
}
/* Get the region lock. */
if (!LF_ISSET(DB_MUTEXDEBUG))
(void)__db_mutex_lock(&rp->lock,
fd, dbenv == NULL ? NULL : dbenv->db_yield);
/*
* The file may have been half-written if we were descheduled between
* getting the size of the file and checking the major version. Check
* to make sure we got the entire file.
*/
if ((ret = __db_stat(dbenv, name, fd, &size2, NULL)) != 0)
goto err1;
if (size1 != size2) {
ret = EAGAIN;
goto err1;
}
/* The file may have just been deleted. */
if (F_ISSET(rp, DB_R_DELETED)) {
ret = EAGAIN;
goto err1;
}
/* Increment the reference count. */
++rp->refcnt;
/* Release the lock. */
if (!LF_ISSET(DB_MUTEXDEBUG))
(void)__db_mutex_unlock(&rp->lock, fd);
FREES(name);
*(void **)retp = rp;
return (0);
err1: if (!LF_ISSET(DB_MUTEXDEBUG))
(void)__db_mutex_unlock(&rp->lock, fd);
err2: if (rp != NULL)
(void)__db_munmap(rp, rp->size);
if (fd != -1)
(void)__db_close(fd);
FREES(name);
return (ret);
}
/*
* __db_rclose --
* Close a shared memory region.
*
* PUBLIC: int __db_rclose __P((DB_ENV *, int, void *));
*/
int
__db_rclose(dbenv, fd, ptr)
DB_ENV *dbenv;
int fd;
void *ptr;
{
RLAYOUT *rp;
int ret, t_ret;
const char *fail;
rp = ptr;
fail = NULL;
/* Get the lock. */
if ((ret = __db_mutex_lock(&rp->lock,
fd, dbenv == NULL ? NULL : dbenv->db_yield)) != 0) {
fail = "lock get";
goto err;
}
/* Decrement the reference count. */
--rp->refcnt;
/* Release the lock. */
if ((t_ret = __db_mutex_unlock(&rp->lock, fd)) != 0 && fail == NULL) {
ret = t_ret;
fail = "lock release";
}
/* Discard the region. */
if ((t_ret = __db_munmap(ptr, rp->size)) != 0 && fail == NULL) {
ret = t_ret;
fail = "munmap";
}
if ((t_ret = __db_close(fd)) != 0 && fail == NULL) {
ret = t_ret;
fail = "close";
}
if (fail == NULL)
return (0);
err: __db_err(dbenv, "region detach: %s: %s", fail, strerror(ret));
return (ret);
}
/*
* __db_runlink --
* Remove a shared memory region.
*
* PUBLIC: int __db_runlink __P((DB_ENV *,
* PUBLIC: APPNAME, const char *, const char *, int));
*/
int
__db_runlink(dbenv, appname, path, file, force)
DB_ENV *dbenv;
APPNAME appname;
const char *path, *file;
int force;
{
RLAYOUT *rp;
int cnt, fd, ret, t_ret;
char *name;
rp = NULL;
/* Get the filename. */
if ((ret = __db_appname(dbenv, appname, path, file, NULL, &name)) != 0)
return (ret);
/* If the file doesn't exist, we're done. */
if (__db_exists(name, NULL))
return (0); /* XXX: ENOENT? */
/*
* If we're called with a force flag, try and unlink the file. This
* may not succeed if the file is currently open, but there's nothing
* we can do about that. There is a race condition between the check
* for existence above and the actual unlink. If someone else snuck
* in and removed it before we do the remove, then we might get an
* ENOENT error. If we get the ENOENT, we treat it as success, just
* as we do above.
*/
if (force) {
if ((ret = __db_unlink(name)) != 0 && ret != ENOENT)
goto err1;
FREES(name);
return (0);
}
/* Open and lock the region. */
if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0)
goto err1;
(void)__db_mutex_lock(&rp->lock,
fd, dbenv == NULL ? NULL : dbenv->db_yield);
/* If the region is currently being deleted, fail. */
if (F_ISSET(rp, DB_R_DELETED)) {
ret = ENOENT; /* XXX: ENOENT? */
goto err2;
}
/* If the region is currently in use by someone else, fail. */
if (rp->refcnt > 1) {
ret = EBUSY;
goto err2;
}
/* Set the delete flag. */
F_SET(rp, DB_R_DELETED);
/* Release the lock and close the region. */
(void)__db_mutex_unlock(&rp->lock, fd);
if ((t_ret = __db_rclose(dbenv, fd, rp)) != 0 && ret == 0)
goto err1;
/*
* Unlink the region. There's a race here -- other threads or
* processes might be opening the region while we're trying to
* remove it. They'll fail, because we've set the DELETED flag,
* but they could still stop us from succeeding in the unlink.
*/
for (cnt = 5; cnt > 0; --cnt) {
if ((ret = __db_unlink(name)) == 0)
break;
(void)__db_sleep(0, 250000);
}
if (ret == 0) {
FREES(name);
return (0);
}
/* Not a clue. Try to clear the DB_R_DELETED flag. */
if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0)
goto err1;
(void)__db_mutex_lock(&rp->lock,
fd, dbenv == NULL ? NULL : dbenv->db_yield);
F_CLR(rp, DB_R_DELETED);
/* FALLTHROUGH */
err2: (void)__db_mutex_unlock(&rp->lock, fd);
(void)__db_rclose(dbenv, fd, rp);
err1: __db_err(dbenv, "region unlink: %s: %s", name, strerror(ret));
FREES(name);
return (ret);
}
/*
* DB creates all regions on 4K boundaries so that we don't make the
* underlying VM unhappy.
*/
#define __DB_VMPAGESIZE (4 * 1024)
/*
* __db_rgrow --
* Extend a region by a specified amount.
*
* PUBLIC: int __db_rgrow __P((DB_ENV *, int, size_t));
*/
int
__db_rgrow(dbenv, fd, incr)
DB_ENV *dbenv;
int fd;
size_t incr;
{
#ifdef MMAP_INIT_NEEDED
size_t i;
#endif
ssize_t nw;
int ret;
char buf[__DB_VMPAGESIZE];
/* Seek to the end of the region. */
if ((ret = __db_lseek(fd, 0, 0, 0, SEEK_END)) != 0)
goto err;
/* Write nuls to the new bytes. */
memset(buf, 0, sizeof(buf));
/*
* Historically, some systems required that all of the bytes of the
* region be written before you could mmap it and access it randomly.
*/
#ifdef MMAP_INIT_NEEDED
/* Extend the region by writing each new page. */
for (i = 0; i < incr; i += __DB_VMPAGESIZE) {
if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0)
goto err;
if (nw != sizeof(buf))
goto eio;
}
#else
/*
* Extend the region by writing the last page.
*
* Round off the increment to the next page boundary.
*/
incr += __DB_VMPAGESIZE - 1;
incr -= incr % __DB_VMPAGESIZE;
/* Write the last page, not the page after the last. */
if ((ret = __db_lseek(fd, 0, 0, incr - __DB_VMPAGESIZE, SEEK_CUR)) != 0)
goto err;
if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0)
goto err;
if (nw != sizeof(buf))
goto eio;
#endif
return (0);
eio: ret = EIO;
err: __db_err(dbenv, "region grow: %s", strerror(ret));
return (ret);
}
/*
* __db_rremap --
* Unmap the old region and map in a new region of a new size. If
* either call fails, returns NULL, else returns the address of the
* new region.
*
* PUBLIC: int __db_rremap __P((DB_ENV *, void *, size_t, size_t, int, void *));
*/
int
__db_rremap(dbenv, ptr, oldsize, newsize, fd, retp)
DB_ENV *dbenv;
void *ptr, *retp;
size_t oldsize, newsize;
int fd;
{
int ret;
if ((ret = __db_munmap(ptr, oldsize)) != 0) {
__db_err(dbenv, "region remap: munmap: %s", strerror(ret));
return (ret);
}
return (__db_rmap(dbenv, fd, newsize, retp));
}
/*
* __db_rmap --
* Attach to a shared memory region.
*/
static int
__db_rmap(dbenv, fd, size, retp)
DB_ENV *dbenv;
int fd;
size_t size;
void *retp;
{
RLAYOUT *rp;
int ret;
if ((ret = __db_mmap(fd, size, 0, 0, &rp)) != 0) {
__db_err(dbenv, "region map: mmap %s", strerror(ret));
return (ret);
}
if (rp->size < size)
rp->size = size;
*(void **)retp = rp;
return (0);
}

290
db2/common/db_salloc.c Normal file
View File

@ -0,0 +1,290 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)db_salloc.c 10.6 (Sleepycat) 7/5/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#include <stdio.h>
#endif
#include "db_int.h"
#include "shqueue.h"
#include "common_ext.h"
/*
* Implement shared memory region allocation, using simple first-fit algorithm.
* The model is that we take a "chunk" of shared memory store and begin carving
* it up into areas, similarly to how malloc works. We do coalescing on free.
*
* The "len" field in the __data struct contains the length of the free region
* (less the size_t bytes that holds the length). We use the address provided
* by the caller to find this length, which allows us to free a chunk without
* requiring that the caller pass in the length of the chunk they're freeing.
*/
SH_LIST_HEAD(__head);
struct __data {
size_t len;
SH_LIST_ENTRY links;
};
/*
* __db_shalloc_init --
* Initialize the area as one large chunk.
*
* PUBLIC: void __db_shalloc_init __P((void *, size_t));
*/
void
__db_shalloc_init(area, size)
void *area;
size_t size;
{
struct __data *elp;
struct __head *hp;
hp = area;
SH_LIST_INIT(hp);
elp = (struct __data *)(hp + 1);
elp->len = size - sizeof(struct __head) - sizeof(elp->len);
SH_LIST_INSERT_HEAD(hp, elp, links, __data);
}
/*
* __db_shalloc --
* Allocate some space from the shared region.
*
* PUBLIC: int __db_shalloc __P((void *, size_t, size_t, void *));
*/
int
__db_shalloc(p, len, align, retp)
void *p, *retp;
size_t len, align;
{
struct __data *elp;
size_t *sp;
void *rp;
/*
* We never allocate less than the size of a struct __data, align
* to less than a size_t boundary, or align to something that's not
* a multiple of a size_t.
*/
if (len < sizeof(struct __data))
len = sizeof(struct __data);
align = align <= sizeof(size_t) ?
sizeof(size_t) : ALIGN(align, sizeof(size_t));
/* Walk the list, looking for a slot. */
for (elp = SH_LIST_FIRST((struct __head *)p, __data);
elp != NULL;
elp = SH_LIST_NEXT(elp, links, __data)) {
/*
* Calculate the value of the returned pointer if we were to
* use this chunk.
* + Find the end of the chunk.
* + Subtract the memory the user wants.
* + Find the closest previous correctly-aligned address.
*/
rp = (u_int8_t *)elp + sizeof(size_t) + elp->len;
rp = (u_int8_t *)rp - len;
rp = (u_int8_t *)((ALIGNTYPE)rp & ~(align - 1));
/*
* Rp may now point before elp->links, in which case the chunk
* was too small, and we have to try again.
*/
if ((u_int8_t *)rp < (u_int8_t *)&elp->links)
continue;
*(void **)retp = rp;
/*
* If there are at least 32 bytes of additional memory, divide
* the chunk into two chunks.
*/
if ((u_int8_t *)rp >= (u_int8_t *)&elp->links + 32) {
sp = rp;
*--sp = elp->len -
((u_int8_t *)rp - (u_int8_t *)&elp->links);
elp->len -= *sp + sizeof(size_t);
return (0);
}
/*
* Otherwise, we return the entire chunk, wasting some amount
* of space to keep the list compact. However, because the
* address we're returning to the user may not be the address
* of the start of the region for alignment reasons, set the
* size_t length fields back to the "real" length field to a
* flag value, so that we can find the real length during free.
*/
#define ILLEGAL_SIZE 1
SH_LIST_REMOVE(elp, links, __data);
for (sp = rp; (u_int8_t *)--sp >= (u_int8_t *)&elp->links;)
*sp = ILLEGAL_SIZE;
return (0);
}
/* Nothing found large enough; need to figure out how to grow region. */
return (ENOMEM);
}
/*
* __db_shalloc_free --
* Free a shared memory allocation.
*
* PUBLIC: void __db_shalloc_free __P((void *, void *));
*/
void
__db_shalloc_free(regionp, ptr)
void *regionp, *ptr;
{
struct __data *elp, *lastp, *newp;
struct __head *hp;
size_t free_size, *sp;
int merged;
/*
* Step back over flagged length fields to find the beginning of
* the object and its real size.
*/
for (sp = (size_t *)ptr; sp[-1] == ILLEGAL_SIZE; --sp);
ptr = sp;
newp = (struct __data *)((u_int8_t *)ptr - sizeof(size_t));
free_size = newp->len;
/*
* Walk the list, looking for where this entry goes.
*
* We keep the free list sorted by address so that coalescing is
* trivial.
*
* XXX
* Probably worth profiling this to see how expensive it is.
*/
hp = (struct __head *)regionp;
for (elp = SH_LIST_FIRST(hp, __data), lastp = NULL;
elp != NULL && (void *)elp < (void *)ptr;
lastp = elp, elp = SH_LIST_NEXT(elp, links, __data));
/*
* Elp is either NULL (we reached the end of the list), or the slot
* after the one that's being returned. Lastp is either NULL (we're
* returning the first element of the list) or the element before the
* one being returned.
*
* Check for coalescing with the next element.
*/
merged = 0;
if ((u_int8_t *)ptr + free_size == (u_int8_t *)elp) {
newp->len += elp->len + sizeof(size_t);
SH_LIST_REMOVE(elp, links, __data);
if (lastp != NULL)
SH_LIST_INSERT_AFTER(lastp, newp, links, __data);
else
SH_LIST_INSERT_HEAD(hp, newp, links, __data);
merged = 1;
}
/* Check for coalescing with the previous element. */
if (lastp != NULL && (u_int8_t *)lastp +
lastp->len + sizeof(size_t) == (u_int8_t *)newp) {
lastp->len += newp->len + sizeof(size_t);
/*
* If we have already put the new element into the list take
* it back off again because it's just been merged with the
* previous element.
*/
if (merged)
SH_LIST_REMOVE(newp, links, __data);
merged = 1;
}
if (!merged)
if (lastp == NULL)
SH_LIST_INSERT_HEAD(hp, newp, links, __data);
else
SH_LIST_INSERT_AFTER(lastp, newp, links, __data);
}
/*
* __db_shalloc_count --
* Return the amount of memory on the free list.
*
* PUBLIC: size_t __db_shalloc_count __P((void *));
*/
size_t
__db_shalloc_count(addr)
void *addr;
{
struct __data *elp;
size_t count;
count = 0;
for (elp = SH_LIST_FIRST((struct __head *)addr, __data);
elp != NULL;
elp = SH_LIST_NEXT(elp, links, __data))
count += elp->len;
return (count);
}
/*
* __db_shsizeof --
* Return the size of a shalloc'd piece of memory.
*
* PUBLIC: size_t __db_shsizeof __P((void *));
*/
size_t
__db_shsizeof(ptr)
void *ptr;
{
struct __data *elp;
size_t *sp;
/*
* Step back over flagged length fields to find the beginning of
* the object and its real size.
*/
for (sp = (size_t *)ptr; sp[-1] == ILLEGAL_SIZE; --sp);
elp = (struct __data *)((u_int8_t *)sp - sizeof(size_t));
return (elp->len);
}
#ifdef DEBUG
/*
* __db_shalloc_dump --
*
* PUBLIC: void __db_shalloc_dump __P((FILE *, void *));
*/
void
__db_shalloc_dump(fp, addr)
FILE *fp;
void *addr;
{
struct __data *elp;
if (fp == NULL)
fp = stderr;
for (elp = SH_LIST_FIRST((struct __head *)addr, __data);
elp != NULL;
elp = SH_LIST_NEXT(elp, links, __data))
fprintf(fp, "%#lx: %lu\t", (u_long)elp, (u_long)elp->len);
fprintf(fp, "\n");
}
#endif

90
db2/common/db_shash.c Normal file
View File

@ -0,0 +1,90 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)db_shash.c 10.3 (Sleepycat) 6/21/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#endif
#include "db_int.h"
#include "shqueue.h"
#include "common_ext.h"
/* Powers-of-2 and close-by prime number pairs. */
static const struct {
int power;
int prime;
} list[] = {
{ 64, 67},
{ 128, 131},
{ 256, 257},
{ 512, 521},
{1024, 1031},
{2048, 2053},
{4096, 4099},
{8192, 8191},
{0, 0}
};
/*
* __db_tablesize --
* Choose a size for the hash table.
*
* PUBLIC: int __db_tablesize __P((int));
*/
int
__db_tablesize(n_buckets)
int n_buckets;
{
int i;
/*
* We try to be clever about how big we make the hash tables. Pick
* a prime number close to the "suggested" number of elements that
* will be in the hash table. We shoot for minimum collisions (i.e.
* one element in each bucket). We use 64 as the minimum table size.
*
* Ref: Sedgewick, Algorithms in C, "Hash Functions"
*/
if (n_buckets < 64)
n_buckets = 64;
for (i = 0;; ++i) {
if (list[i].power == 0) {
--i;
break;
}
if (list[i].power >= n_buckets)
break;
}
return (list[i].prime);
}
/*
* __db_hashinit --
* Initialize a hash table that resides in shared memory.
*
* PUBLIC: void __db_hashinit __P((void *, int));
*/
void
__db_hashinit(begin, nelements)
void *begin;
int nelements;
{
int i;
SH_TAILQ_HEAD(hash_head) *headp;
headp = (struct hash_head *)begin;
for (i = 0; i < nelements; i++, headp++)
SH_TAILQ_INIT(headp);
}

10
db2/compat.h Normal file
View File

@ -0,0 +1,10 @@
/* Compatibility gunk for the db library. */
#include <sys/types.h>
#define EFTYPE EINVAL
/* Emulate Solaris llseek(). */
typedef loff_t offset_t;
extern int llseek (int fd, loff_t offset, int whence);

142
db2/config.h Normal file
View File

@ -0,0 +1,142 @@
/* config.h. Generated automatically by configure. */
/* config.h.in. Generated automatically from configure.in by autoheader. */
/* ...but edited by hand to be used in GNU libc. */
#include <endian.h>
#include <sys/stat.h> /* To get _STATBUF_ST_BLKSIZE. */
/* Define to empty if the keyword does not work. */
/* #undef const */
/* Define if your struct stat has st_blksize. */
#ifdef _STATBUF_ST_BLKSIZE
# define HAVE_ST_BLKSIZE 1
#endif
/* Define to `int' if <sys/types.h> doesn't define. */
/* #undef mode_t */
/* Define to `long' if <sys/types.h> doesn't define. */
/* #undef off_t */
/* Define to `int' if <sys/types.h> doesn't define. */
/* #undef pid_t */
/* Define to `unsigned' if <sys/types.h> doesn't define. */
/* #undef size_t */
/* Define if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Define if your processor stores words with the most significant
byte first (like Motorola and SPARC, unlike Intel and VAX). */
#if __BYTE_ORDER == BIG_ENDIAN
# define WORDS_BIGENDIAN 1
#endif
/* Define to `int' if <sys/types.h> doesn't define. */
/* #undef ssize_t */
/* Define if you want a debugging version. */
/* #undef DEBUG */
/* Define if you have sigfillset (and sigprocmask). */
#define HAVE_SIGFILLSET 1
/* Define if seeking to 64-bit file offsets requires the _llseek() call. */
/* #undef HAVE_LLSEEK */
/* Define if seeking to 64-bit file offsets requires the _lseeki64() call. */
/* #undef HAVE_LSEEKI */
/* Define if you have spinlocks. */
/* #undef HAVE_SPINLOCKS */
/* Define if you want to use mc68020/gcc assembly spinlocks. */
/* #undef HAVE_ASSEM_MC68020_GCC */
/* Define if you want to use sparc/gcc assembly spinlocks. */
/* #undef HAVE_ASSEM_SPARC_GCC */
/* Define if you want to use uts4/cc assembly spinlocks. */
/* #undef HAVE_ASSEM_UTS4_CC */
/* Define if you want to use x86/gcc assembly spinlocks. */
/* #undef HAVE_ASSEM_X86_GCC */
/* Define if you have the AIX _check_lock spinlocks. */
/* #undef HAVE_FUNC_AIX */
/* Define if you have the OSF1 or HPPA msemaphore spinlocks. */
/* #undef HAVE_FUNC_MSEM */
/* Define if you have the SGI abilock_t spinlocks. */
/* #undef HAVE_FUNC_SGI */
/* Define if you have the Solaris mutex_t spinlocks. */
/* #undef HAVE_FUNC_SOLARIS */
/* Define if your sprintf returns a pointer, not a length. */
/* #undef SPRINTF_RET_CHARPNT */
/* Define if you have the getcwd function. */
#define HAVE_GETCWD 1
/* Define if you have the getopt function. */
#define HAVE_GETOPT 1
/* Define if you have the getuid function. */
#define HAVE_GETUID 1
/* Define if you have the memcmp function. */
#define HAVE_MEMCMP 1
/* Define if you have the memcpy function. */
#define HAVE_MEMCPY 1
/* Define if you have the memmove function. */
#define HAVE_MEMMOVE 1
/* Define if you have the mmap function. */
#define HAVE_MMAP 1
/* Define if you have the raise function. */
#define HAVE_RAISE 1
/* Define if you have the select function. */
#define HAVE_SELECT 1
/* Define if you have the snprintf function. */
#define HAVE_SNPRINTF 1
/* Define if you have the strdup function. */
#define HAVE_STRDUP 1
/* Define if you have the strerror function. */
#define HAVE_STRERROR 1
/* Define if you have the strsep function. */
#define HAVE_STRSEP 1
/* Define if you have the vsnprintf function. */
#define HAVE_VSNPRINTF 1
/* Define if you have the <dirent.h> header file. */
#define HAVE_DIRENT_H 1
/* Define if you have the <ndir.h> header file. */
/* #undef HAVE_NDIR_H */
/* Define if you have the <sys/dir.h> header file. */
/* #undef HAVE_SYS_DIR_H */
/* Define if you have the <sys/ndir.h> header file. */
/* #undef HAVE_SYS_NDIR_H */
/* Define if you have the <sys/select.h> header file. */
#define HAVE_SYS_SELECT_H 1
/* Define if you have the <sys/time.h> header file. */
#define HAVE_SYS_TIME_H 1
#include_next <config.h>

796
db2/db.h Normal file
View File

@ -0,0 +1,796 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
* @(#)db.h.src 10.67 (Sleepycat) 8/25/97
*/
#ifndef _DB_H_
#define _DB_H_
#ifndef __NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <stdio.h>
#endif
/*
* XXX
* MacOS: ensure that Metrowerks C makes enumeration types int sized.
*/
#ifdef __MWERKS__
#pragma enumsalwaysint on
#endif
/*
* XXX
* Handle function prototypes and the keyword "const". This steps on name
* space that DB doesn't control, but all of the other solutions are worse.
*/
#undef __P
#if defined(__STDC__) || defined(__cplusplus)
#define __P(protos) protos /* ANSI C prototypes */
#else
#define const
#define __P(protos) () /* K&R C preprocessor */
#endif
/*
* !!!
* DB needs basic information about specifically sized types. If they're
* not provided by the system, typedef them here.
*
* We protect them against multiple inclusion using __BIT_TYPES_DEFINED__,
* as does BIND and Kerberos, since we don't know for sure what #include
* files the user is using.
*
* !!!
* We also provide the standard u_int, u_long etc., if they're not provided
* by the system. This isn't completely necessary, but the example programs
* need them.
*/
#ifndef __BIT_TYPES_DEFINED__
#define __BIT_TYPES_DEFINED__
#endif
#define DB_VERSION_MAJOR 2
#define DB_VERSION_MINOR 3
#define DB_VERSION_PATCH 4
#define DB_VERSION_STRING "Sleepycat Software: DB 2.3.4: (8/20/97)"
typedef u_int32_t db_pgno_t; /* Page number type. */
typedef u_int16_t db_indx_t; /* Page offset type. */
#define DB_MAX_PAGES 0xffffffff /* >= # of pages in a file */
typedef u_int32_t db_recno_t; /* Record number type. */
typedef size_t DB_LOCK; /* Object returned by lock manager. */
#define DB_MAX_RECORDS 0xffffffff /* >= # of records in a tree */
#define DB_FILE_ID_LEN 20 /* DB file ID length. */
/* Forward structure declarations, so applications get type checking. */
struct __db; typedef struct __db DB;
#ifdef DB_DBM_HSEARCH
typedef struct __db DBM;
#endif
struct __db_bt_stat; typedef struct __db_bt_stat DB_BTREE_STAT;
struct __db_dbt; typedef struct __db_dbt DBT;
struct __db_env; typedef struct __db_env DB_ENV;
struct __db_info; typedef struct __db_info DB_INFO;
struct __db_lockregion; typedef struct __db_lockregion DB_LOCKREGION;
struct __db_lockreq; typedef struct __db_lockreq DB_LOCKREQ;
struct __db_locktab; typedef struct __db_locktab DB_LOCKTAB;
struct __db_log; typedef struct __db_log DB_LOG;
struct __db_lsn; typedef struct __db_lsn DB_LSN;
struct __db_mpool; typedef struct __db_mpool DB_MPOOL;
struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT;
struct __db_mpool_stat; typedef struct __db_mpool_stat DB_MPOOL_STAT;
struct __db_mpoolfile; typedef struct __db_mpoolfile DB_MPOOLFILE;
struct __db_txn; typedef struct __db_txn DB_TXN;
struct __db_txn_active; typedef struct __db_txn_active DB_TXN_ACTIVE;
struct __db_txn_stat; typedef struct __db_txn_stat DB_TXN_STAT;
struct __db_txnmgr; typedef struct __db_txnmgr DB_TXNMGR;
struct __db_txnregion; typedef struct __db_txnregion DB_TXNREGION;
struct __dbc; typedef struct __dbc DBC;
/* Key/data structure -- a Data-Base Thang. */
struct __db_dbt {
void *data; /* key/data */
u_int32_t size; /* key/data length */
u_int32_t ulen; /* RO: length of user buffer. */
u_int32_t dlen; /* RO: get/put record length. */
u_int32_t doff; /* RO: get/put record offset. */
#define DB_DBT_INTERNAL 0x01 /* Perform any mallocs using regular
malloc, not the user's malloc. */
#define DB_DBT_MALLOC 0x02 /* Return in allocated memory. */
#define DB_DBT_PARTIAL 0x04 /* Partial put/get. */
#define DB_DBT_USERMEM 0x08 /* Return in user's memory. */
u_int32_t flags;
};
/*
* Database configuration and initialization.
*/
/*
* Flags understood by both db_open(3) and db_appinit(3).
*/
#define DB_CREATE 0x00001 /* O_CREAT: create file as necessary. */
#define DB_NOMMAP 0x00002 /* Don't mmap underlying file. */
#define DB_THREAD 0x00004 /* Free-thread DB package handles. */
/*
* Flags understood by db_appinit(3).
*
* DB_APP_INIT and DB_MUTEXDEBUG are internal only, and not documented.
*/
/* 0x00007 COMMON MASK. */
#define DB_APP_INIT 0x00008 /* Appinit called, paths initialized. */
#define DB_INIT_LOCK 0x00010 /* Initialize locking. */
#define DB_INIT_LOG 0x00020 /* Initialize logging. */
#define DB_INIT_MPOOL 0x00040 /* Initialize mpool. */
#define DB_INIT_TXN 0x00080 /* Initialize transactions. */
#define DB_MPOOL_PRIVATE 0x00100 /* Mpool: private memory pool. */
#define DB_MUTEXDEBUG 0x00200 /* Do not get/set mutexes in regions. */
#define DB_RECOVER 0x00400 /* Run normal recovery. */
#define DB_RECOVER_FATAL 0x00800 /* Run catastrophic recovery. */
#define DB_TXN_NOSYNC 0x01000 /* Do not sync log on commit. */
#define DB_USE_ENVIRON 0x02000 /* Use the environment. */
#define DB_USE_ENVIRON_ROOT 0x04000 /* Use the environment if root. */
/* CURRENTLY UNUSED LOCK FLAGS. */
#define DB_TXN_LOCK_2PL 0x00000 /* Two-phase locking. */
#define DB_TXN_LOCK_OPTIMISTIC 0x00000 /* Optimistic locking. */
#define DB_TXN_LOCK_MASK 0x00000 /* Lock flags mask. */
/* CURRENTLY UNUSED LOG FLAGS. */
#define DB_TXN_LOG_REDO 0x00000 /* Redo-only logging. */
#define DB_TXN_LOG_UNDO 0x00000 /* Undo-only logging. */
#define DB_TXN_LOG_UNDOREDO 0x00000 /* Undo/redo write-ahead logging. */
#define DB_TXN_LOG_MASK 0x00000 /* Log flags mask. */
/*
* Flags understood by db_open(3).
*
* DB_EXCL and DB_TEMPORARY are internal only, and not documented.
* DB_SEQUENTIAL is currently internal, but likely to be exported some day.
*/
/* 0x00007 COMMON MASK. */
/* 0x07fff ALREADY USED. */
#define DB_EXCL 0x08000 /* O_EXCL: exclusive open. */
#define DB_RDONLY 0x10000 /* O_RDONLY: read-only. */
#define DB_SEQUENTIAL 0x20000 /* Indicate sequential access. */
#define DB_TEMPORARY 0x40000 /* Remove on last close. */
#define DB_TRUNCATE 0x80000 /* O_TRUNCATE: replace existing DB. */
/*
* Deadlock detector modes; used in the DBENV structure to configure the
* locking subsystem.
*/
#define DB_LOCK_NORUN 0x0
#define DB_LOCK_DEFAULT 0x1
#define DB_LOCK_OLDEST 0x2
#define DB_LOCK_RANDOM 0x3
#define DB_LOCK_YOUNGEST 0x4
struct __db_env {
int db_lorder; /* Byte order. */
/* Error message callback. */
void (*db_errcall) __P((const char *, char *));
FILE *db_errfile; /* Error message file stream. */
const char *db_errpfx; /* Error message prefix. */
int db_verbose; /* Generate debugging messages. */
/* User paths. */
char *db_home; /* Database home. */
char *db_log_dir; /* Database log file directory. */
char *db_tmp_dir; /* Database tmp file directory. */
char **db_data_dir; /* Database data file directories. */
int data_cnt; /* Database data file slots. */
int data_next; /* Next Database data file slot. */
/* Locking. */
DB_LOCKTAB *lk_info; /* Return from lock_open(). */
u_int8_t *lk_conflicts; /* Two dimensional conflict matrix. */
int lk_modes; /* Number of lock modes in table. */
unsigned int lk_max; /* Maximum number of locks. */
u_int32_t lk_detect; /* Deadlock detect on every conflict. */
int (*db_yield) __P((void)); /* Yield function for threads. */
/* Logging. */
DB_LOG *lg_info; /* Return from log_open(). */
u_int32_t lg_max; /* Maximum file size. */
/* Memory pool. */
DB_MPOOL *mp_info; /* Return from memp_open(). */
size_t mp_mmapsize; /* Maximum file size for mmap. */
size_t mp_size; /* Bytes in the mpool cache. */
/* Transactions. */
DB_TXNMGR *tx_info; /* Return from txn_open(). */
unsigned int tx_max; /* Maximum number of transactions. */
int (*tx_recover) /* Dispatch function for recovery. */
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
u_int32_t flags; /* Flags. */
};
/*******************************************************
* Access methods.
*******************************************************/
typedef enum {
DB_BTREE=1, /* B+tree. */
DB_HASH, /* Extended Linear Hashing. */
DB_RECNO, /* Fixed and variable-length records. */
DB_UNKNOWN /* Figure it out on open. */
} DBTYPE;
#define DB_BTREEVERSION 6 /* Current btree version. */
#define DB_BTREEOLDVER 6 /* Oldest btree version supported. */
#define DB_BTREEMAGIC 0x053162
#define DB_HASHVERSION 5 /* Current hash version. */
#define DB_HASHOLDVER 4 /* Oldest hash version supported. */
#define DB_HASHMAGIC 0x061561
#define DB_LOGVERSION 2 /* Current log version. */
#define DB_LOGOLDVER 2 /* Oldest log version supported. */
#define DB_LOGMAGIC 0x040988
struct __db_info {
int db_lorder; /* Byte order. */
size_t db_cachesize; /* Underlying cache size. */
size_t db_pagesize; /* Underlying page size. */
/* Local heap allocation. */
void *(*db_malloc) __P((size_t));
/* Btree access method. */
int bt_maxkey; /* Maximum keys per page. */
int bt_minkey; /* Minimum keys per page. */
int (*bt_compare) /* Comparison function. */
__P((const DBT *, const DBT *));
size_t (*bt_prefix) /* Prefix function. */
__P((const DBT *, const DBT *));
/* Hash access method. */
unsigned int h_ffactor; /* Fill factor. */
unsigned int h_nelem; /* Number of elements. */
u_int32_t (*h_hash) /* Hash function. */
__P((const void *, u_int32_t));
/* Recno access method. */
int re_pad; /* Fixed-length padding byte. */
int re_delim; /* Variable-length delimiting byte. */
u_int32_t re_len; /* Length for fixed-length records. */
char *re_source; /* Source file name. */
#define DB_DELIMITER 0x0001 /* Recno: re_delim set. */
#define DB_DUP 0x0002 /* Btree, Hash: duplicate keys. */
#define DB_FIXEDLEN 0x0004 /* Recno: fixed-length records. */
#define DB_PAD 0x0008 /* Recno: re_pad set. */
#define DB_RECNUM 0x0010 /* Btree: record numbers. */
#define DB_RENUMBER 0x0020 /* Recno: renumber on insert/delete. */
#define DB_SNAPSHOT 0x0040 /* Recno: snapshot the input. */
u_int32_t flags;
};
/*
* DB access method and cursor operation codes. These are implemented as
* bit fields for future flexibility, but currently only a single one may
* be specified to any function.
*/
#define DB_AFTER 0x000001 /* c_put() */
#define DB_APPEND 0x000002 /* put() */
#define DB_BEFORE 0x000004 /* c_put() */
#define DB_CHECKPOINT 0x000008 /* log_put(), log_get() */
#define DB_CURRENT 0x000010 /* c_get(), c_put(), log_get() */
#define DB_FIRST 0x000020 /* c_get(), log_get() */
#define DB_FLUSH 0x000040 /* log_put() */
#define DB_GET_RECNO 0x000080 /* c_get() */
#define DB_KEYFIRST 0x000100 /* c_put() */
#define DB_KEYLAST 0x000200 /* c_put() */
#define DB_LAST 0x000400 /* c_get(), log_get() */
#define DB_NEXT 0x000800 /* c_get(), log_get() */
#define DB_NOOVERWRITE 0x001000 /* put() */
#define DB_NOSYNC 0x002000 /* close() */
#define DB_PREV 0x004000 /* c_get(), log_get() */
#define DB_RECORDCOUNT 0x008000 /* stat() */
#define DB_SET 0x010000 /* c_get(), log_get() */
#define DB_SET_RANGE 0x020000 /* c_get() */
#define DB_SET_RECNO 0x040000 /* get(), c_get() */
/* DB (user visible) error return codes. */
#define DB_INCOMPLETE ( -1) /* Sync didn't finish. */
#define DB_KEYEMPTY ( -2) /* The key/data pair was deleted or
was never created by the user. */
#define DB_KEYEXIST ( -3) /* The key/data pair already exists. */
#define DB_LOCK_DEADLOCK ( -4) /* Locker killed to resolve deadlock. */
#define DB_LOCK_NOTGRANTED ( -5) /* Lock unavailable, no-wait set. */
#define DB_LOCK_NOTHELD ( -6) /* Lock not held by locker. */
#define DB_NOTFOUND ( -7) /* Key/data pair not found (EOF). */
/* DB (private) error return codes. */
#define DB_DELETED ( -8) /* Recovery file marked deleted. */
#define DB_NEEDSPLIT ( -9) /* Page needs to be split. */
#define DB_REGISTERED (-10) /* Entry was previously registered. */
#define DB_SWAPBYTES (-11) /* Database needs byte swapping. */
struct __db_ilock { /* Internal DB access method lock. */
db_pgno_t pgno; /* Page being locked. */
/* File id. */
u_int8_t fileid[DB_FILE_ID_LEN];
};
/* DB access method description structure. */
struct __db {
void *mutex; /* Synchronization for free threading */
DBTYPE type; /* DB access method. */
DB_ENV *dbenv; /* DB_ENV structure. */
DB_ENV *mp_dbenv; /* DB_ENV for local mpool creation. */
DB *master; /* Original DB created by db_open. */
void *internal; /* Access method private. */
DB_MPOOL *mp; /* The access method's mpool. */
DB_MPOOLFILE *mpf; /* The access method's mpool file. */
/*
* XXX
* Explicit representations of structures in queue.h.
*
* TAILQ_HEAD(curs_queue, __dbc);
*/
struct {
struct __dbc *tqh_first;
struct __dbc **tqh_last;
} curs_queue;
/*
* XXX
* Explicit representations of structures in queue.h.
*
* LIST_HEAD(handleq, __db);
* LIST_ENTRY(__db);
*/
struct {
struct __db *lh_first;
} handleq; /* List of handles for this DB. */
struct {
struct __db *le_next;
struct __db **le_prev;
} links; /* Links for the handle list. */
u_int32_t log_fileid; /* Logging file id. */
DB_TXN *txn; /* Current transaction. */
u_int32_t locker; /* Default process' locker id. */
DBT lock_dbt; /* DBT referencing lock. */
struct __db_ilock lock; /* Lock. */
size_t pgsize; /* Logical page size of file. */
/* Local heap allocation. */
void *(*db_malloc) __P((size_t));
/* Functions. */
int (*close) __P((DB *, int));
int (*cursor) __P((DB *, DB_TXN *, DBC **));
int (*del) __P((DB *, DB_TXN *, DBT *, int));
int (*fd) __P((DB *, int *));
int (*get) __P((DB *, DB_TXN *, DBT *, DBT *, int));
int (*put) __P((DB *, DB_TXN *, DBT *, DBT *, int));
int (*stat) __P((DB *, void *, void *(*)(size_t), int));
int (*sync) __P((DB *, int));
#define DB_AM_DUP 0x000001 /* DB_DUP (internal). */
#define DB_AM_INMEM 0x000002 /* In-memory; no sync on close. */
#define DB_AM_LOCKING 0x000004 /* Perform locking. */
#define DB_AM_LOGGING 0x000008 /* Perform logging. */
#define DB_AM_MLOCAL 0x000010 /* Database memory pool is local. */
#define DB_AM_PGDEF 0x000020 /* Page size was defaulted. */
#define DB_AM_RDONLY 0x000040 /* Database is readonly. */
#define DB_AM_RECOVER 0x000080 /* In recovery (do not log or lock). */
#define DB_AM_SWAP 0x000100 /* Pages need to be byte-swapped. */
#define DB_AM_THREAD 0x000200 /* DB is multi-threaded. */
#define DB_BT_RECNUM 0x000400 /* DB_RECNUM (internal) */
#define DB_HS_DIRTYMETA 0x000800 /* Hash: Metadata page modified. */
#define DB_RE_DELIMITER 0x001000 /* DB_DELIMITER (internal). */
#define DB_RE_FIXEDLEN 0x002000 /* DB_FIXEDLEN (internal). */
#define DB_RE_PAD 0x004000 /* DB_PAD (internal). */
#define DB_RE_RENUMBER 0x008000 /* DB_RENUMBER (internal). */
#define DB_RE_SNAPSHOT 0x010000 /* DB_SNAPSHOT (internal). */
u_int32_t flags;
};
/* Cursor description structure. */
struct __dbc {
DB *dbp; /* Related DB access method. */
DB_TXN *txn; /* Associated transaction. */
/*
* XXX
* Explicit representations of structures in queue.h.
*
* TAILQ_ENTRY(__dbc);
*/
struct {
struct __dbc *tqe_next;
struct __dbc **tqe_prev;
} links;
void *internal; /* Access method private. */
int (*c_close) __P((DBC *));
int (*c_del) __P((DBC *, int));
int (*c_get) __P((DBC *, DBT *, DBT *, int));
int (*c_put) __P((DBC *, DBT *, DBT *, int));
};
/* Btree/recno statistics structure. */
struct __db_bt_stat {
u_int32_t bt_flags; /* Open flags. */
u_int32_t bt_maxkey; /* Maxkey value. */
u_int32_t bt_minkey; /* Minkey value. */
u_int32_t bt_re_len; /* Fixed-length record length. */
u_int32_t bt_re_pad; /* Fixed-length record pad. */
u_int32_t bt_pagesize; /* Page size. */
u_int32_t bt_levels; /* Tree levels. */
u_int32_t bt_nrecs; /* Number of records. */
u_int32_t bt_int_pg; /* Internal pages. */
u_int32_t bt_leaf_pg; /* Leaf pages. */
u_int32_t bt_dup_pg; /* Duplicate pages. */
u_int32_t bt_over_pg; /* Overflow pages. */
u_int32_t bt_free; /* Pages on the free list. */
u_int32_t bt_freed; /* Pages freed for reuse. */
u_int32_t bt_int_pgfree; /* Bytes free in internal pages. */
u_int32_t bt_leaf_pgfree; /* Bytes free in leaf pages. */
u_int32_t bt_dup_pgfree; /* Bytes free in duplicate pages. */
u_int32_t bt_over_pgfree; /* Bytes free in overflow pages. */
u_int32_t bt_pfxsaved; /* Bytes saved by prefix compression. */
u_int32_t bt_split; /* Total number of splits. */
u_int32_t bt_rootsplit; /* Root page splits. */
u_int32_t bt_fastsplit; /* Fast splits. */
u_int32_t bt_added; /* Items added. */
u_int32_t bt_deleted; /* Items deleted. */
u_int32_t bt_get; /* Items retrieved. */
u_int32_t bt_cache_hit; /* Hits in fast-insert code. */
u_int32_t bt_cache_miss; /* Misses in fast-insert code. */
};
#if defined(__cplusplus)
extern "C" {
#endif
int db_appinit __P((const char *, char * const *, DB_ENV *, int));
int db_appexit __P((DB_ENV *));
int db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **));
const char *db_version __P((int *, int *, int *));
#if defined(__cplusplus)
};
#endif
/*******************************************************
* Locking
*******************************************************/
#define DB_LOCKVERSION 1
#define DB_LOCKMAGIC 0x090193
/* Flag values for lock_vec(). */
#define DB_LOCK_NOWAIT 0x01 /* Don't wait on unavailable lock. */
/* Flag values for lock_detect(). */
#define DB_LOCK_CONFLICT 0x01 /* Run on any conflict. */
/* Request types. */
typedef enum {
DB_LOCK_DUMP, /* Display held locks. */
DB_LOCK_GET, /* Get the lock. */
DB_LOCK_PUT, /* Release the lock. */
DB_LOCK_PUT_ALL, /* Release locker's locks. */
DB_LOCK_PUT_OBJ /* Release locker's locks on obj. */
} db_lockop_t;
/* Simple R/W lock modes and for multi-granularity intention locking. */
typedef enum {
DB_LOCK_NG=0, /* Not granted. */
DB_LOCK_READ, /* Shared/read. */
DB_LOCK_WRITE, /* Exclusive/write. */
DB_LOCK_IREAD, /* Intent to share/read. */
DB_LOCK_IWRITE, /* Intent exclusive/write. */
DB_LOCK_IWR /* Intent to read and write. */
} db_lockmode_t;
/* Lock request structure. */
struct __db_lockreq {
db_lockop_t op; /* Operation. */
db_lockmode_t mode; /* Requested mode. */
u_int32_t locker; /* Locker identity. */
DBT *obj; /* Object being locked. */
DB_LOCK lock; /* Lock returned. */
};
/*
* Commonly used conflict matrices.
*
* Standard Read/Write (or exclusive/shared) locks.
*/
#define DB_LOCK_RW_N 3
extern const u_int8_t db_rw_conflicts[];
/* Multi-granularity locking. */
#define DB_LOCK_RIW_N 6
extern const u_int8_t db_riw_conflicts[];
#if defined(__cplusplus)
extern "C" {
#endif
int lock_close __P((DB_LOCKTAB *));
int lock_detect __P((DB_LOCKTAB *, int, u_int32_t));
int lock_get __P((DB_LOCKTAB *,
u_int32_t, int, const DBT *, db_lockmode_t, DB_LOCK *));
int lock_id __P((DB_LOCKTAB *, u_int32_t *));
int lock_open __P((const char *, int, int, DB_ENV *, DB_LOCKTAB **));
int lock_put __P((DB_LOCKTAB *, DB_LOCK));
int lock_unlink __P((const char *, int, DB_ENV *));
int lock_vec __P((DB_LOCKTAB *,
u_int32_t, int, DB_LOCKREQ *, int, DB_LOCKREQ **));
#if defined(__cplusplus)
};
#endif
/*******************************************************
* Logging.
*******************************************************/
/* Flag values for log_archive(). */
#define DB_ARCH_ABS 0x001 /* Absolute pathnames. */
#define DB_ARCH_DATA 0x002 /* Data files. */
#define DB_ARCH_LOG 0x004 /* Log files. */
/*
* A DB_LSN has two parts, a fileid which identifies a specific file, and an
* offset within that file. The fileid is an unsigned 4-byte quantity that
* uniquely identifies a file within the log directory -- currently a simple
* counter inside the log. The offset is also an unsigned 4-byte value. The
* log manager guarantees the offset is never more than 4 bytes by switching
* to a new log file before the maximum length imposed by an unsigned 4-byte
* offset is reached.
*/
struct __db_lsn {
u_int32_t file; /* File ID. */
u_int32_t offset; /* File offset. */
};
#if defined(__cplusplus)
extern "C" {
#endif
int log_archive __P((DB_LOG *, char **[], int, void *(*)(size_t)));
int log_close __P((DB_LOG *));
int log_compare __P((const DB_LSN *, const DB_LSN *));
int log_file __P((DB_LOG *, const DB_LSN *, char *, size_t));
int log_flush __P((DB_LOG *, const DB_LSN *));
int log_get __P((DB_LOG *, DB_LSN *, DBT *, int));
int log_open __P((const char *, int, int, DB_ENV *, DB_LOG **));
int log_put __P((DB_LOG *, DB_LSN *, const DBT *, int));
int log_register __P((DB_LOG *, DB *, const char *, DBTYPE, u_int32_t *));
int log_unlink __P((const char *, int, DB_ENV *));
int log_unregister __P((DB_LOG *, u_int32_t));
#if defined(__cplusplus)
};
#endif
/*******************************************************
* Mpool
*******************************************************/
/* Flag values for memp_fget(). */
#define DB_MPOOL_CREATE 0x001 /* Create a page. */
#define DB_MPOOL_LAST 0x002 /* Return the last page. */
#define DB_MPOOL_NEW 0x004 /* Create a new page. */
/* Flag values for memp_fput(), memp_fset(). */
#define DB_MPOOL_CLEAN 0x001 /* Clear modified bit. */
#define DB_MPOOL_DIRTY 0x002 /* Page is modified. */
#define DB_MPOOL_DISCARD 0x004 /* Don't cache the page. */
/* Mpool statistics structure. */
struct __db_mpool_stat {
size_t st_cachesize; /* Cache size. */
unsigned long st_cache_hit; /* Pages found in the cache. */
unsigned long st_cache_miss; /* Pages not found in the cache. */
unsigned long st_map; /* Pages from mapped files. */
unsigned long st_page_create; /* Pages created in the cache. */
unsigned long st_page_in; /* Pages read in. */
unsigned long st_page_out; /* Pages written out. */
unsigned long st_ro_evict; /* Read-only pages evicted. */
unsigned long st_rw_evict; /* Read-write pages evicted. */
unsigned long st_hash_buckets; /* Number of hash buckets. */
unsigned long st_hash_searches; /* Total hash chain searches. */
unsigned long st_hash_longest; /* Longest hash chain searched. */
unsigned long st_hash_examined; /* Total hash entries searched. */
};
/* Mpool file statistics structure. */
struct __db_mpool_fstat {
char *file_name; /* File name. */
size_t st_pagesize; /* Page size. */
unsigned long st_cache_hit; /* Pages found in the cache. */
unsigned long st_cache_miss; /* Pages not found in the cache. */
unsigned long st_map; /* Pages from mapped files. */
unsigned long st_page_create; /* Pages created in the cache. */
unsigned long st_page_in; /* Pages read in. */
unsigned long st_page_out; /* Pages written out. */
};
#if defined(__cplusplus)
extern "C" {
#endif
int memp_close __P((DB_MPOOL *));
int memp_fclose __P((DB_MPOOLFILE *));
int memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, unsigned long, void *));
int memp_fopen __P((DB_MPOOL *, const char *,
int, int, int, size_t, int, DBT *, u_int8_t *, DB_MPOOLFILE **));
int memp_fput __P((DB_MPOOLFILE *, void *, unsigned long));
int memp_fset __P((DB_MPOOLFILE *, void *, unsigned long));
int memp_fsync __P((DB_MPOOLFILE *));
int memp_open __P((const char *, int, int, DB_ENV *, DB_MPOOL **));
int memp_register __P((DB_MPOOL *, int,
int (*)(db_pgno_t, void *, DBT *),
int (*)(db_pgno_t, void *, DBT *)));
int memp_stat __P((DB_MPOOL *,
DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, void *(*)(size_t)));
int memp_sync __P((DB_MPOOL *, DB_LSN *));
int memp_unlink __P((const char *, int, DB_ENV *));
#if defined(__cplusplus)
};
#endif
/*******************************************************
* Transactions.
*******************************************************/
#define DB_TXNVERSION 1
#define DB_TXNMAGIC 0x041593
/* Operations values to the tx_recover() function. */
#define DB_TXN_BACKWARD_ROLL 1 /* Read the log backwards. */
#define DB_TXN_FORWARD_ROLL 2 /* Read the log forwards. */
#define DB_TXN_OPENFILES 3 /* Read for open files. */
#define DB_TXN_REDO 4 /* Redo the operation. */
#define DB_TXN_UNDO 5 /* Undo the operation. */
/* Internal transaction status values. */
/* Transaction statistics structure. */
struct __db_txn_active {
u_int32_t txnid; /* Transaction ID */
DB_LSN lsn; /* Lsn of the begin record */
};
struct __db_txn_stat {
DB_LSN st_last_ckp; /* lsn of the last checkpoint */
DB_LSN st_pending_ckp; /* last checkpoint did not finish */
time_t st_time_ckp; /* time of last checkpoint */
u_int32_t st_last_txnid; /* last transaction id given out */
u_int32_t st_maxtxns; /* maximum number of active txns */
u_int32_t st_naborts; /* number of aborted transactions */
u_int32_t st_nbegins; /* number of begun transactions */
u_int32_t st_ncommits; /* number of committed transactions */
u_int32_t st_nactive; /* number of active transactions */
DB_TXN_ACTIVE *st_txnarray; /* array of active transactions */
};
#if defined(__cplusplus)
extern "C" {
#endif
int txn_abort __P((DB_TXN *));
int txn_begin __P((DB_TXNMGR *, DB_TXN *, DB_TXN **));
int txn_checkpoint __P((const DB_TXNMGR *, long, long));
int txn_commit __P((DB_TXN *));
int txn_close __P((DB_TXNMGR *));
u_int32_t txn_id __P((DB_TXN *));
int txn_open __P((const char *, int, int, DB_ENV *, DB_TXNMGR **));
int txn_prepare __P((DB_TXN *));
int txn_stat __P((DB_TXNMGR *, DB_TXN_STAT **, void *(*)(size_t)));
int txn_unlink __P((const char *, int, DB_ENV *));
#if defined(__cplusplus)
};
#endif
#ifdef DB_DBM_HSEARCH
/*******************************************************
* Dbm/Ndbm historic interfaces.
*******************************************************/
#define DBM_INSERT 0 /* Flags to dbm_store(). */
#define DBM_REPLACE 1
/*
* The db(3) support for ndbm(3) always appends this suffix to the
* file name to avoid overwriting the user's original database.
*/
#define DBM_SUFFIX ".db"
typedef struct {
char *dptr;
int dsize;
} datum;
#if defined(__cplusplus)
extern "C" {
#endif
int dbminit __P((char *));
#if !defined(__cplusplus)
int delete __P((datum));
#endif
datum fetch __P((datum));
datum firstkey __P((void));
datum nextkey __P((datum));
int store __P((datum, datum));
/*
* !!!
* Don't prototype:
*
* dbm_clearerr(DBM *db);
* dbm_dirfno(DBM *db);
* dbm_error(DBM *db);
* dbm_pagfno(DBM *db);
* dbm_rdonly(DBM *db);
*
* they weren't documented and were historically implemented as #define's.
*/
void dbm_close __P((DBM *));
int dbm_delete __P((DBM *, datum));
datum dbm_fetch __P((DBM *, datum));
datum dbm_firstkey __P((DBM *));
long dbm_forder __P((DBM *, datum));
datum dbm_nextkey __P((DBM *));
DBM *dbm_open __P((const char *, int, int));
int dbm_store __P((DBM *, datum, datum, int));
#if defined(__cplusplus)
};
#endif
/*******************************************************
* Hsearch historic interface.
*******************************************************/
typedef enum {
FIND, ENTER
} ACTION;
typedef struct entry {
char *key;
void *data;
} ENTRY;
#if defined(__cplusplus)
extern "C" {
#endif
int hcreate __P((unsigned int));
void hdestroy __P((void));
ENTRY *hsearch __P((ENTRY, ACTION));
#if defined(__cplusplus)
};
#endif
#endif /* DB_DBM_HSEARCH */
/*
* XXX
* MacOS: Reset Metrowerks C enum sizes.
*/
#ifdef __MWERKS__
#pragma enumsalwaysint reset
#endif
#endif /* !_DB_H_ */

818
db2/db/db.c Normal file
View File

@ -0,0 +1,818 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
* Keith Bostic. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)db.c 10.37 (Sleepycat) 8/23/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#endif
#include "db_int.h"
#include "shqueue.h"
#include "db_page.h"
#include "db_shash.h"
#include "db_swap.h"
#include "btree.h"
#include "hash.h"
#include "mp.h"
#include "db_am.h"
#include "common_ext.h"
static int db_close __P((DB *, int));
static int db_fd __P((DB *, int *));
/*
* If the metadata page has the flag set, set the local flag. If the page
* does NOT have the flag set, return EINVAL if the user's dbinfo argument
* caused us to already set the local flag.
*/
#define DBINFO_FCHK(dbp, fn, meta_flags, m_name, dbp_name) { \
if ((meta_flags) & (m_name)) \
F_SET(dbp, dbp_name); \
else \
if (F_ISSET(dbp, dbp_name)) { \
__db_err(dbenv, \
"%s: %s specified in dbinfo argument but not set in file", \
fname, fn); \
goto einval; \
} \
}
/*
* db_open --
* Main library interface to the DB access methods.
*/
int
db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp)
const char *fname;
DBTYPE type;
int flags, mode;
DB_ENV *dbenv;
DB_INFO *dbinfo;
DB **dbpp;
{
BTMETA *btm;
DB *dbp;
DBT pgcookie;
DB_ENV *envp, t_dbenv;
DB_PGINFO pginfo;
HASHHDR *hashm;
off_t io;
size_t cachesize;
ssize_t nr;
int fd, ftype, need_fileid, restore, ret, retry_cnt, swapped;
char *real_name, mbuf[512];
/* Validate arguments. */
#ifdef HAVE_SPINLOCKS
#define OKFLAGS (DB_CREATE | DB_NOMMAP | DB_RDONLY | DB_THREAD | DB_TRUNCATE)
#else
#define OKFLAGS (DB_CREATE | DB_NOMMAP | DB_RDONLY | DB_TRUNCATE)
#endif
if ((ret = __db_fchk(dbenv, "db_open", flags, OKFLAGS)) != 0)
return (ret);
/* Initialize for error return. */
fd = -1;
need_fileid = 1;
real_name = NULL;
/* Allocate the DB structure, reference the DB_ENV structure. */
if ((dbp = (DB *)calloc(1, sizeof(DB))) == NULL) {
__db_err(dbenv, "%s", strerror(ENOMEM));
return (ENOMEM);
}
dbp->dbenv = dbenv;
/* Convert the dbinfo flags. */
if (dbinfo != NULL) {
/*
* !!!
* We can't check for illegal flags until we know what type
* of open we're doing.
*/
if (F_ISSET(dbinfo, DB_DELIMITER))
F_SET(dbp, DB_RE_DELIMITER);
if (F_ISSET(dbinfo, DB_DUP))
F_SET(dbp, DB_AM_DUP);
if (F_ISSET(dbinfo, DB_FIXEDLEN))
F_SET(dbp, DB_RE_FIXEDLEN);
if (F_ISSET(dbinfo, DB_PAD))
F_SET(dbp, DB_RE_PAD);
if (F_ISSET(dbinfo, DB_RECNUM))
F_SET(dbp, DB_BT_RECNUM);
if (F_ISSET(dbinfo, DB_RENUMBER))
F_SET(dbp, DB_RE_RENUMBER);
if (F_ISSET(dbinfo, DB_SNAPSHOT))
F_SET(dbp, DB_RE_SNAPSHOT);
}
/* Set based on the open(2) flags. */
if (LF_ISSET(DB_RDONLY))
F_SET(dbp, DB_AM_RDONLY);
/* Check threading fields. */
if (LF_ISSET(DB_THREAD)) {
if ((dbp->mutex =
(db_mutex_t *)malloc(sizeof(db_mutex_t))) == NULL) {
__db_err(dbenv, "%s", strerror(ENOMEM));
ret = ENOMEM;
goto err;
}
__db_mutex_init(dbp->mutex, 0);
F_SET(dbp, DB_AM_THREAD);
}
/*
* Always set the master and initialize the queues, so we can
* use these fields without checking the thread bit.
*/
dbp->master = dbp;
LIST_INIT(&dbp->handleq);
LIST_INSERT_HEAD(&dbp->handleq, dbp, links);
TAILQ_INIT(&dbp->curs_queue);
/*
* Set based on the dbenv fields, although no logging or transactions
* are possible for temporary files.
*/
if (dbp->dbenv != NULL) {
if (dbenv->lk_info != NULL)
F_SET(dbp, DB_AM_LOCKING);
if (fname != NULL && dbenv->lg_info != NULL)
F_SET(dbp, DB_AM_LOGGING);
}
/* Set the common fields. */
if (dbinfo == NULL) {
dbp->pgsize = 0;
dbp->db_malloc = NULL;
} else {
dbp->pgsize = dbinfo->db_pagesize;
dbp->db_malloc = dbinfo->db_malloc;
}
/* Fill in the default file mode. */
if (mode == 0)
mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
/* Check if the user wants us to swap byte order. */
if (dbinfo != NULL)
switch (ret = __db_byteorder(dbenv, dbinfo->db_lorder)) {
case 0:
break;
case DB_SWAPBYTES:
F_SET(dbp, DB_AM_SWAP);
break;
default:
goto err;
}
/*
* If we have a file name, try and read the first page, figure out
* what type of file it is, and initialize everything we can based
* on that file's meta-data page.
*
* XXX
* We don't actually expect zero-length strings as arguments. We
* do the check, permitting them, because scripting languages, e.g.,
* the Tcl test suite, doesn't know anything about passing NULL's.
*/
if (fname != NULL && fname[0] != '\0') {
/* Get the real file name. */
if ((ret = __db_appname(dbenv,
DB_APP_DATA, NULL, fname, NULL, &real_name)) != 0)
goto err;
/*
* Open the backing file. We need to make sure that multiple
* processes attempting to create the file at the same time
* are properly ordered so that only one of them creates the
* "unique" file id, so we open it O_EXCL and O_CREAT so two
* simultaneous attempts to create the region will return
* failure in one of the attempts. If we're one of the ones
* that fail, we simply retry without the O_CREAT flag, which
* will require that the meta-data page exist.
*/
#undef OKFLAGS
#define OKFLAGS \
DB_CREATE | DB_NOMMAP | DB_RDONLY | DB_THREAD | DB_TRUNCATE
retry_cnt = 0;
open_retry: if (LF_ISSET(DB_CREATE)) {
if ((ret = __db_fdopen(real_name, flags | DB_EXCL,
OKFLAGS | DB_EXCL, mode, &fd)) != 0)
if (ret == EEXIST) {
LF_CLR(DB_CREATE);
goto open_retry;
} else {
__db_err(dbenv,
"%s: %s", fname, strerror(ret));
goto err;
}
} else
if ((ret = __db_fdopen(real_name,
flags, OKFLAGS, mode, &fd)) != 0) {
__db_err(dbenv, "%s: %s", fname, strerror(ret));
goto err;
}
/*
* Use the optimum I/O size as the pagesize if a pagesize not
* specified. Some filesystems have 64K as their optimum I/O
* size, but as that results in impossibly large default cache
* sizes, we limit the default pagesize to 16K.
*/
if (dbp->pgsize == 0) {
if ((ret = __db_stat(dbp->dbenv,
real_name, fd, NULL, &io)) != 0)
goto err;
if (io < 512)
io = 512;
if (io > 16 * 1024)
io = 16 * 1024;
dbp->pgsize = io;
F_SET(dbp, DB_AM_PGDEF);
}
/*
* Try and read the first disk sector -- this code assumes
* that the meta-data for all access methods fits in 512
* bytes, and that no database will be smaller than that.
*/
if ((ret = __db_read(fd, mbuf, sizeof(mbuf), &nr)) != 0)
goto err;
/* The fd is no longer needed. */
(void)__db_close(fd);
fd = -1;
if (nr != sizeof(mbuf)) {
if (nr != 0) {
__db_err(dbenv,
"%s: unexpected file format", fname);
goto einval;
}
/*
* The only way we can reach here with the DB_CREATE
* flag set is if we created the file. If we didn't
* create the file, there's a chance that someone else
* is busily doing so. Sleep and give them a chance,
* because we need the metadata page their going to
* write.
*/
if (!LF_ISSET(DB_CREATE) && retry_cnt++ < 3) {
__db_sleep(1, 0);
goto open_retry;
}
if (type == DB_UNKNOWN) {
__db_err(dbenv,
"%s: DBTYPE of unknown with empty file",
fname);
goto einval;
}
goto empty;
}
/*
* A found file overrides some user information. We'll check
* for possible error conditions based on conflicts between
* the file and the user's arguments below.
*/
swapped = 0;
F_CLR(dbp, DB_AM_SWAP);
retry: switch (((BTMETA *)mbuf)->magic) {
case DB_BTREEMAGIC:
if (type != DB_BTREE &&
type != DB_RECNO && type != DB_UNKNOWN)
goto einval;
btm = (BTMETA *)mbuf;
if (swapped && (ret = __bam_mswap((PAGE *)btm)) != 0)
goto err;
if (btm->version < DB_BTREEOLDVER ||
btm->version > DB_BTREEVERSION) {
__db_err(dbenv,
"%s: unsupported btree version number %lu",
fname, (u_long)btm->version);
goto einval;
}
dbp->pgsize = btm->pagesize;
F_CLR(dbp, DB_AM_PGDEF);
if ((ret = __db_fchk(dbenv,
"db_open", btm->flags, BTM_MASK)) != 0)
goto err;
DBINFO_FCHK(dbp, "DB_DUP",
btm->flags, BTM_DUP, DB_AM_DUP);
if (F_ISSET(btm, BTM_RECNO)) {
DBINFO_FCHK(dbp, "DB_FIXEDLEN",
btm->flags, BTM_FIXEDLEN, DB_RE_FIXEDLEN);
DBINFO_FCHK(dbp, "DB_RENUMBER",
btm->flags, BTM_RENUMBER, DB_RE_RENUMBER);
type = DB_RECNO;
} else {
DBINFO_FCHK(dbp, "DB_RECNUM",
btm->flags, BTM_RECNUM, DB_BT_RECNUM);
type = DB_BTREE;
}
/* Copy the file's unique id. */
need_fileid = 0;
memcpy(dbp->lock.fileid, btm->uid, DB_FILE_ID_LEN);
break;
case DB_HASHMAGIC:
if (type != DB_HASH && type != DB_UNKNOWN)
goto einval;
hashm = (HASHHDR *)mbuf;
if (swapped && (ret = __ham_mswap((PAGE *)hashm)) != 0)
goto err;
if (hashm->version < DB_HASHOLDVER ||
hashm->version > DB_HASHVERSION) {
__db_err(dbenv,
"%s: unsupported hash version number %lu",
fname, hashm->version);
goto einval;
}
dbp->pgsize = hashm->pagesize;
F_CLR(dbp, DB_AM_PGDEF);
if ((ret = __db_fchk(dbenv,
"db_open", hashm->flags, DB_HASH_DUP)) != 0)
goto err;
DBINFO_FCHK(dbp, "DB_DUP",
hashm->flags, DB_HASH_DUP, DB_AM_DUP);
type = DB_HASH;
/* Copy the file's unique id. */
need_fileid = 0;
memcpy(dbp->lock.fileid, hashm->uid, DB_FILE_ID_LEN);
break;
default:
if (swapped) {
__db_err(dbenv, "unrecognized file type");
goto einval;
}
M_32_SWAP(((BTMETA *)mbuf)->magic);
F_SET(dbp, DB_AM_SWAP);
swapped = 1;
goto retry;
}
} else {
fname = real_name = NULL;
if (type == DB_UNKNOWN) {
__db_err(dbenv,
"DBTYPE of unknown without existing file");
goto einval;
}
F_SET(dbp, DB_AM_INMEM);
}
empty: /*
* By the time we get here we've either set the type or we're taking
* it from the user.
*/
dbp->type = type;
/*
* Set the page size to the best value for I/O to this file. Don't
* overflow the page offset type. The page size must be db_indx_t
* aligned and >= MIN_PAGE_SIZE.
*
* XXX
* Should we be checking for a page size that's not a multiple of 512?
*/
if (dbp->pgsize == 0) {
F_SET(dbp, DB_AM_PGDEF);
dbp->pgsize = 8 * 1024;
}
if (dbp->pgsize < DB_MIN_PGSIZE ||
dbp->pgsize > DB_MAX_PGSIZE ||
dbp->pgsize & (sizeof(db_indx_t) - 1)) {
__db_err(dbenv, "illegal page size");
goto einval;
}
/*
* Set and/or correct the cache size; must be a multiple of the
* page size.
*/
if (dbinfo == NULL || dbinfo->db_cachesize == 0)
cachesize = dbp->pgsize * DB_MINCACHE;
else {
cachesize = dbinfo->db_cachesize;
if (cachesize & (dbp->pgsize - 1))
cachesize += (~cachesize & (dbp->pgsize - 1)) + 1;
if (cachesize < dbp->pgsize * DB_MINCACHE)
cachesize = dbp->pgsize * DB_MINCACHE;
if (cachesize < 20 * 1024)
cachesize = 20 * 1024;
}
/*
* If no mpool supplied by the application, attach to a local,
* created buffer pool.
*
* XXX
* If the user has a DB_ENV structure, we have to use a temporary
* one so that we don't step on their values. If the user doesn't,
* we have to create one, and keep it around until the call to the
* memp_close() function. This is all so the mpool functions get
* the error stuff right.
*/
if (dbenv == NULL || dbenv->mp_info == NULL) {
F_SET(dbp, DB_AM_MLOCAL);
if (dbenv == NULL) {
if ((dbp->mp_dbenv =
(DB_ENV *)calloc(sizeof(DB_ENV), 1)) == NULL) {
ret = ENOMEM;
goto err;
}
envp = dbp->mp_dbenv;
restore = 0;
} else {
t_dbenv = *dbenv;
envp = dbenv;
restore = 1;
}
envp->mp_size = cachesize;
F_SET(envp, DB_MPOOL_PRIVATE);
if ((ret = memp_open(NULL,
DB_CREATE, S_IRUSR | S_IWUSR, envp, &dbp->mp)) != 0)
goto err;
if (restore)
*dbenv = t_dbenv;
} else
dbp->mp = dbenv->mp_info;
/* Register DB's pgin/pgout functions. */
if ((ret = memp_register(dbp->mp,
DB_FTYPE_BTREE, __bam_pgin, __bam_pgout)) != 0)
goto err;
if ((ret = memp_register(dbp->mp,
DB_FTYPE_HASH, __ham_pgin, __ham_pgout)) != 0)
goto err;
/*
* If we don't already have one, get a unique file ID. If the file
* is a temporary file, then we have to create a unique file ID --
* no backing file will be created until the mpool cache is filled
* forcing it to go to disk. The created ID must never match any
* potential real file ID -- we know it won't because real file IDs
* contain a time stamp after the dev/ino pair, and we're simply
* storing a 4-byte locker ID.
*
* XXX
* Store the file id in the locker structure -- we can get it from
* there as necessary, and it saves having two copies.
*/
if (need_fileid)
if (fname == NULL) {
memset(dbp->lock.fileid, 0, DB_FILE_ID_LEN);
if (F_ISSET(dbp, DB_AM_LOCKING) &&
(ret = lock_id(dbenv->lk_info,
(u_int32_t *)dbp->lock.fileid)) != 0)
goto err;
} else
if ((ret = __db_fileid(dbenv,
real_name, 1, dbp->lock.fileid)) != 0)
goto err;
/* No further use for the real name. */
if (real_name != NULL)
FREES(real_name);
real_name = NULL;
/*
* Open a backing file in the memory pool.
*
* If we need to process the file's pages on I/O, set the file type.
* If it's a hash file, always call pgin and pgout routines. This
* means that hash files can never be mapped into process memory. If
* it's a btree file and requires swapping, we need to page the file
* in and out. This has to be right -- we can't mmap files that are
* being paged in and out.
*/
if (type == DB_HASH)
ftype = DB_FTYPE_HASH;
else
ftype = F_ISSET(dbp, DB_AM_SWAP) ? DB_FTYPE_BTREE : 0;
pginfo.db_pagesize = dbp->pgsize;
pginfo.needswap = F_ISSET(dbp, DB_AM_SWAP);
pgcookie.data = &pginfo;
pgcookie.size = sizeof(DB_PGINFO);
if ((ret = memp_fopen(dbp->mp, fname, ftype,
F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0, 0, dbp->pgsize,
0, &pgcookie, dbp->lock.fileid, &dbp->mpf)) != 0)
goto err;
/* Get a log file id. */
if (F_ISSET(dbp, DB_AM_LOGGING) &&
(ret = log_register(dbenv->lg_info,
dbp, fname, type, &dbp->log_fileid)) != 0)
goto err;
/*
* Get a locker id for this DB, and build the lock cookie: the first
* db_pgno_t bytes are the page number, the next N bytes are the file
* id.
*/
if (F_ISSET(dbp, DB_AM_LOCKING)) {
if ((ret = lock_id(dbenv->lk_info, &dbp->locker)) != 0)
goto err;
dbp->lock_dbt.size = sizeof(dbp->lock);
dbp->lock_dbt.data = &dbp->lock;
}
/* Call the real open function. */
switch (type) {
case DB_BTREE:
if (dbinfo != NULL && (ret = __db_fchk(dbenv,
"db_open", dbinfo->flags, DB_RECNUM | DB_DUP)) != 0)
goto err;
if (dbinfo != NULL && (ret = __db_fcchk(dbenv,
"db_open", dbinfo->flags, DB_DUP, DB_RECNUM)) != 0)
goto err;
if ((ret = __bam_open(dbp, type, dbinfo)) != 0)
goto err;
break;
case DB_HASH:
if (dbinfo != NULL && (ret = __db_fchk(dbenv,
"db_open", dbinfo->flags, DB_DUP)) != 0)
goto err;
if ((ret = __ham_open(dbp, dbinfo)) != 0)
goto err;
break;
case DB_RECNO:
#define DB_INFO_FLAGS \
(DB_DELIMITER | DB_FIXEDLEN | DB_PAD | DB_RENUMBER | DB_SNAPSHOT)
if (dbinfo != NULL && (ret = __db_fchk(dbenv,
"db_open", dbinfo->flags, DB_INFO_FLAGS)) != 0)
goto err;
if ((ret = __ram_open(dbp, type, dbinfo)) != 0)
goto err;
break;
default:
abort();
}
/* Call a local close routine. */
dbp->close = db_close;
dbp->fd = db_fd;
*dbpp = dbp;
return (0);
einval: ret = EINVAL;
err: /* Close the file descriptor. */
if (fd != -1)
(void)__db_close(fd);
/* Discard the log file id. */
if (dbp->log_fileid != 0)
(void)log_unregister(dbenv->lg_info, dbp->log_fileid);
/* Close the memory pool file. */
if (dbp->mpf != NULL)
(void)memp_fclose(dbp->mpf);
/* If the memory pool was local, close it. */
if (F_ISSET(dbp, DB_AM_MLOCAL) && dbp->mp != NULL)
(void)memp_close(dbp->mp);
/* If we allocated a DB_ENV, discard it. */
if (dbp->mp_dbenv != NULL)
FREE(dbp->mp_dbenv, sizeof(DB_ENV));
if (real_name != NULL)
FREES(real_name);
if (dbp != NULL)
FREE(dbp, sizeof(DB));
return (ret);
}
/*
* db_close --
* Close a DB tree.
*/
static int
db_close(dbp, flags)
DB *dbp;
int flags;
{
DBC *dbc;
DB *tdbp;
int ret, t_ret;
ret = 0;
/* Sync the underlying file. */
if (!LF_ISSET(DB_NOSYNC) &&
(t_ret = dbp->sync(dbp, 0)) != 0 && ret == 0)
ret = t_ret;
/*
* Call the underlying access method close routine for all the
* cursors and handles.
*/
for (tdbp = LIST_FIRST(&dbp->handleq);
tdbp != NULL; tdbp = LIST_NEXT(tdbp, links)) {
while ((dbc = TAILQ_FIRST(&tdbp->curs_queue)) != NULL)
if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
ret = t_ret;
switch (tdbp->type) {
case DB_BTREE:
if ((t_ret = __bam_close(tdbp)) != 0 && ret == 0)
ret = t_ret;
break;
case DB_HASH:
if ((t_ret = __ham_close(tdbp)) != 0 && ret == 0)
ret = t_ret;
break;
case DB_RECNO:
if ((t_ret = __ram_close(tdbp)) != 0 && ret == 0)
ret = t_ret;
break;
default:
abort();
}
}
/* Sync the memory pool. */
if ((t_ret = memp_fsync(dbp->mpf)) != 0 && ret == 0)
ret = t_ret;
/* Close the memory pool file. */
if ((t_ret = memp_fclose(dbp->mpf)) != 0 && ret == 0)
ret = t_ret;
/* If the memory pool was local, close it. */
if (F_ISSET(dbp, DB_AM_MLOCAL) &&
(t_ret = memp_close(dbp->mp)) != 0 && ret == 0)
ret = t_ret;
/* Discard the mutex. */
if (dbp->mutex != NULL)
FREE(dbp->mutex, sizeof(db_mutex_t));
/* Discard the log file id. */
if (F_ISSET(dbp, DB_AM_LOGGING))
(void)log_unregister(dbp->dbenv->lg_info, dbp->log_fileid);
/* Discard the lock cookie for all handles. */
for (tdbp = LIST_FIRST(&dbp->handleq);
tdbp != NULL; tdbp = LIST_NEXT(tdbp, links))
if (F_ISSET(tdbp, DB_AM_LOCKING)) {
#ifdef DEBUG
DB_LOCKREQ request;
/*
* If we're running tests, display any locks currently
* held. It's possible that some applications may hold
* locks for long periods, e.g., conference room locks,
* but the DB tests should never close holding locks.
*/
request.op = DB_LOCK_DUMP;
if ((t_ret = lock_vec(tdbp->dbenv->lk_info,
tdbp->locker, 0, &request, 1, NULL)) != 0 &&
ret == 0)
ret = EAGAIN;
#endif
}
/* If we allocated a DB_ENV, discard it. */
if (dbp->mp_dbenv != NULL)
FREE(dbp->mp_dbenv, sizeof(DB_ENV));
/* Free all of the DB's. */
LIST_REMOVE(dbp, links);
while ((tdbp = LIST_FIRST(&dbp->handleq)) != NULL) {
LIST_REMOVE(tdbp, links);
FREE(tdbp, sizeof(*tdbp));
}
FREE(dbp, sizeof(*dbp));
return (ret);
}
/*
* db_fd --
* Return a file descriptor for flock'ing.
*/
static int
db_fd(dbp, fdp)
DB *dbp;
int *fdp;
{
/* In-memory database can't have a file descriptor. */
if (F_ISSET(dbp, DB_AM_INMEM))
return (ENOENT);
/*
* XXX
* Truly spectacular layering violation. As we don't open the
* underlying file until we need it, it may not be initialized.
*/
if ((*fdp = dbp->mpf->fd) == -1)
return (ENOENT);
return (0);
}
/*
* __db_pgerr --
* Error when unable to retrieve a specified page.
*
* PUBLIC: int __db_pgerr __P((DB *, db_pgno_t));
*/
int
__db_pgerr(dbp, pgno)
DB *dbp;
db_pgno_t pgno;
{
__db_err(dbp->dbenv,
"unable to create/retrieve page %lu", (u_long)pgno);
return (__db_panic(dbp));
}
/*
* __db_pgfmt --
* Error when a page has the wrong format.
*
* PUBLIC: int __db_pgfmt __P((DB *, db_pgno_t));
*/
int
__db_pgfmt(dbp, pgno)
DB *dbp;
db_pgno_t pgno;
{
__db_err(dbp->dbenv,
"page %lu: illegal page type or format", (u_long)pgno);
return (__db_panic(dbp));
}

154
db2/db/db.src Normal file
View File

@ -0,0 +1,154 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
* @(#)db.src 10.3 (Sleepycat) 8/18/97
*/
#include "config.h"
PREFIX db
/*
* addrem -- Add or remove an entry from a duplicate page.
*
* opcode: identifies if this is an add or delete.
* fileid: file identifier of the file being modified.
* pgno: duplicate page number.
* indx: location at which to insert or delete.
* nbytes: number of bytes added/removed to/from the page.
* hdr: header for the data item.
* dbt: data that is deleted or is to be added.
* pagelsn: former lsn of the page.
*
* If the hdr was NULL then, the dbt is a regular B_KEYDATA.
* If the dbt was NULL then the hdr is a complete item to be
* pasted on the page.
*/
BEGIN addrem
ARG opcode u_int32_t lu
ARG fileid u_int32_t lu
ARG pgno db_pgno_t lu
ARG indx u_int32_t lu
ARG nbytes size_t lu
DBT hdr DBT s
DBT dbt DBT s
POINTER pagelsn DB_LSN * lu
END
/*
* split -- Handles the split of a duplicate page.
*
* opcode: defines whether we are splitting from or splitting onto
* fileid: file identifier of the file being modified.
* pgno: page number being split.
* pageimage: entire page contents.
* pagelsn: former lsn of the page.
*/
BEGIN split
ARG opcode u_int32_t lu
ARG fileid u_int32_t lu
ARG pgno db_pgno_t lu
DBT pageimage DBT s
POINTER pagelsn DB_LSN * lu
END
/*
* big -- Handles addition and deletion of big key/data items.
*
* opcode: identifies get/put.
* fileid: file identifier of the file being modified.
* pgno: page onto which data is being added/removed.
* prev_pgno: the page before the one we are logging.
* next_pgno: the page after the one we are logging.
* dbt: data being written onto the page.
* pagelsn: former lsn of the orig_page.
* prevlsn: former lsn of the prev_pgno.
* nextlsn: former lsn of the next_pgno. This is not currently used, but
* may be used later if we actually do overwrites of big key/
* data items in place.
*/
BEGIN big
ARG opcode u_int32_t lu
ARG fileid u_int32_t lu
ARG pgno db_pgno_t lu
ARG prev_pgno db_pgno_t lu
ARG next_pgno db_pgno_t lu
DBT dbt DBT s
POINTER pagelsn DB_LSN * lu
POINTER prevlsn DB_LSN * lu
POINTER nextlsn DB_LSN * lu
END
/*
* ovref -- Handles increment of overflow page reference count.
*
* fileid: identifies the file being modified.
* pgno: page number being incremented.
* lsn the page's original lsn.
*/
BEGIN ovref
ARG fileid u_int32_t lu
ARG pgno db_pgno_t lu
POINTER lsn DB_LSN * lu
END
/*
* relink -- Handles relinking around a page.
*
* pgno: the page being changed.
* lsn the page's original lsn.
* prev: the previous page.
* lsn_prev: the previous page's original lsn.
* next: the next page.
* lsn_next: the previous page's original lsn.
*/
BEGIN relink
ARG fileid u_int32_t lu
ARG pgno db_pgno_t lu
POINTER lsn DB_LSN * lu
ARG prev db_pgno_t lu
POINTER lsn_prev DB_LSN * lu
ARG next db_pgno_t lu
POINTER lsn_next DB_LSN * lu
END
/*
* Addpage -- Handles adding a new duplicate page onto the end of
* an existing duplicate page.
* fileid: identifies the file being changed.
* pgno: page number to which a new page is being added.
* lsn: lsn of pgno
* nextpgno: new page number being added.
* nextlsn: lsn of nextpgno;
*/
BEGIN addpage
ARG fileid u_int32_t lu
ARG pgno db_pgno_t lu
POINTER lsn DB_LSN * lu
ARG nextpgno db_pgno_t lu
POINTER nextlsn DB_LSN * lu
END
/*
* Debug -- log an operation upon entering an access method.
* op: Operation (cursor, c_close, c_get, c_put, c_del,
* get, put, delete).
* fileid: identifies the file being acted upon.
* key: key paramater
* data: data parameter
* flags: flags parameter
*/
BEGIN debug
DBT op DBT s
ARG fileid u_int32_t lu
DBT key DBT s
DBT data DBT s
ARG arg_flags u_int32_t lu
END
/*
* noop -- do nothing, but get an LSN.
*/
BEGIN noop
END

1462
db2/db/db_auto.c Normal file

File diff suppressed because it is too large Load Diff

219
db2/db/db_conv.c Normal file
View File

@ -0,0 +1,219 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
* Keith Bostic. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)db_conv.c 10.4 (Sleepycat) 8/15/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "db_swap.h"
#include "db_am.h"
static int __db_convert __P((db_pgno_t, void *, int));
/*
* __db_pgin, __db_pgout --
*
* PUBLIC: int __db_pgin __P((db_pgno_t, void *));
* PUBLIC: int __db_pgout __P((db_pgno_t, void *));
*/
int
__db_pgin(pg, pp)
db_pgno_t pg;
void *pp;
{
return (__db_convert(pg, pp, 1));
}
int
__db_pgout(pg, pp)
db_pgno_t pg;
void *pp;
{
return (__db_convert(pg, pp, 0));
}
/*
* __db_convert --
* Actually convert a page.
*/
static int
__db_convert(pg, pp, pgin)
db_pgno_t pg; /* Unused, but left for the future. */
void *pp;
int pgin;
{
BINTERNAL *bi;
BKEYDATA *bk;
BOVERFLOW *bo;
HKEYDATA *hk;
PAGE *h;
RINTERNAL *ri;
db_indx_t i;
u_int8_t *p;
h = pp;
if (pgin) {
M_32_SWAP(h->lsn.file);
M_32_SWAP(h->lsn.offset);
M_32_SWAP(h->pgno);
M_32_SWAP(h->prev_pgno);
M_32_SWAP(h->next_pgno);
M_16_SWAP(h->entries);
M_16_SWAP(h->hf_offset);
}
switch (h->type) {
case P_HASH:
for (i = 0; i < NUM_ENT(h); i++) {
if (pgin)
M_16_SWAP(h->inp[i]);
hk = GET_HKEYDATA(h, i);
switch (hk->type) {
case H_KEYDATA:
break;
case H_DUPLICATE:
case H_OFFPAGE:
p = (u_int8_t *)hk + sizeof(u_int8_t);
++p;
SWAP32(p); /* tlen */
SWAP32(p); /* pgno */
SWAP16(p); /* offset */
SWAP16(p); /* len */
break;
}
if (!pgin)
M_16_SWAP(h->inp[i]);
}
break;
case P_LBTREE:
case P_LRECNO:
case P_DUPLICATE:
for (i = 0; i < NUM_ENT(h); i++) {
if (pgin)
M_16_SWAP(h->inp[i]);
bk = GET_BKEYDATA(h, i);
switch (bk->type) {
case B_KEYDATA:
M_16_SWAP(bk->len);
break;
case B_DUPLICATE:
case B_OVERFLOW:
bo = (BOVERFLOW *)bk;
M_32_SWAP(bo->tlen);
M_32_SWAP(bo->pgno);
break;
}
if (!pgin)
M_16_SWAP(h->inp[i]);
}
break;
case P_IBTREE:
for (i = 0; i < NUM_ENT(h); i++) {
if (pgin)
M_16_SWAP(h->inp[i]);
bi = GET_BINTERNAL(h, i);
switch (bi->type) {
case B_KEYDATA:
M_16_SWAP(bi->len);
M_32_SWAP(bi->pgno);
M_32_SWAP(bi->nrecs);
break;
case B_DUPLICATE:
case B_OVERFLOW:
bo = (BOVERFLOW *)bi;
M_32_SWAP(bo->tlen);
M_32_SWAP(bo->pgno);
break;
}
if (!pgin)
M_16_SWAP(h->inp[i]);
}
break;
case P_IRECNO:
for (i = 0; i < NUM_ENT(h); i++) {
if (pgin)
M_16_SWAP(h->inp[i]);
ri = GET_RINTERNAL(h, i);
M_32_SWAP(ri->pgno);
M_32_SWAP(ri->nrecs);
if (!pgin)
M_16_SWAP(h->inp[i]);
}
case P_OVERFLOW:
case P_INVALID:
/* Nothing to do. */
break;
default:
return (EINVAL);
}
if (!pgin) {
/* Swap the header information. */
M_32_SWAP(h->lsn.file);
M_32_SWAP(h->lsn.offset);
M_32_SWAP(h->pgno);
M_32_SWAP(h->prev_pgno);
M_32_SWAP(h->next_pgno);
M_16_SWAP(h->entries);
M_16_SWAP(h->hf_offset);
}
return (0);
}

270
db2/db/db_dispatch.c Normal file
View File

@ -0,0 +1,270 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1995, 1996
* The President and Fellows of Harvard University. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)db_dispatch.c 10.5 (Sleepycat) 7/2/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "db_dispatch.h"
#include "db_am.h"
#include "common_ext.h"
/*
* Data structures to manage the DB dispatch table. The dispatch table
* is a dynamically allocated array of pointers to dispatch functions.
* The dispatch_size is the number of entries possible in the current
* dispatch table and the dispatch_valid is the number of valid entries
* in the dispatch table.
*/
static int (**dispatch_table) __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
static u_int32_t dispatch_size = 0;
/*
* __db_dispatch --
*
* This is the transaction dispatch function used by the db access methods.
* It is designed to handle the record format used by all the access
* methods (the one automatically generated by the db_{h,log,read}.sh
* scripts in the tools directory). An application using a different
* recovery paradigm will supply a different dispatch function to txn_open.
*
* PUBLIC: int __db_dispatch __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__db_dispatch(logp, db, lsnp, redo, info)
DB_LOG *logp; /* The log file. */
DBT *db; /* The log record upon which to dispatch. */
DB_LSN *lsnp; /* The lsn of the record being dispatched. */
int redo; /* Redo this op (or undo it). */
void *info;
{
u_int32_t rectype, txnid;
memcpy(&rectype, db->data, sizeof(rectype));
memcpy(&txnid, (u_int8_t *)db->data + sizeof(rectype), sizeof(txnid));
switch (redo) {
case TXN_REDO:
case TXN_UNDO:
return ((dispatch_table[rectype])(logp, db, lsnp, redo, info));
case TXN_OPENFILES:
if (rectype < DB_txn_BEGIN )
return ((dispatch_table[rectype])(logp,
db, lsnp, redo, info));
break;
case TXN_BACKWARD_ROLL:
/*
* Running full recovery in the backward pass. If we've
* seen this txnid before and added to it our commit list,
* then we do nothing during this pass. If we've never
* seen it, then we call the appropriate recovery routine
* in "abort mode".
*/
if (__db_txnlist_find(info, txnid) == DB_NOTFOUND)
return ((dispatch_table[rectype])(logp,
db, lsnp, TXN_UNDO, info));
break;
case TXN_FORWARD_ROLL:
/*
* In the forward pass, if we haven't seen the transaction,
* do nothing, else recovery it.
*/
if (__db_txnlist_find(info, txnid) != DB_NOTFOUND)
return ((dispatch_table[rectype])(logp,
db, lsnp, TXN_REDO, info));
break;
default:
abort();
}
return (0);
}
/*
* __db_add_recovery --
*
* PUBLIC: int __db_add_recovery __P((DB_ENV *,
* PUBLIC: int (*)(DB_LOG *, DBT *, DB_LSN *, int, void *), u_int32_t));
*/
int
__db_add_recovery(dbenv, func, ndx)
DB_ENV *dbenv;
int (*func) __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
u_int32_t ndx;
{
u_int32_t i;
/* Check if function is already registered. */
if (dispatch_table && ndx < dispatch_size &&
dispatch_table[ndx] != 0 && dispatch_table[ndx] != func)
return (DB_REGISTERED);
/* Check if we have to grow the table. */
if (ndx >= dispatch_size) {
if (dispatch_table == NULL)
dispatch_table = (int (**)
__P((DB_LOG *, DBT *, DB_LSN *, int, void *)))
malloc(DB_user_BEGIN * sizeof(dispatch_table[0]));
else
dispatch_table = (int (**)
__P((DB_LOG *, DBT *, DB_LSN *, int, void *)))
realloc(dispatch_table, (DB_user_BEGIN +
dispatch_size) * sizeof(dispatch_table[0]));
if (dispatch_table == NULL) {
__db_err(dbenv, "%s", strerror(ENOMEM));
return (ENOMEM);
}
for (i = dispatch_size,
dispatch_size += DB_user_BEGIN; i < dispatch_size; ++i)
dispatch_table[i] = NULL;
}
dispatch_table[ndx] = func;
return (0);
}
/*
* __db_txnlist_init --
* Initialize transaction linked list.
*
* PUBLIC: int __db_txnlist_init __P((void *));
*/
int
__db_txnlist_init(retp)
void *retp;
{
__db_txnhead *headp;
if ((headp =
(struct __db_txnhead *)malloc(sizeof(struct __db_txnhead))) == NULL)
return (ENOMEM);
LIST_INIT(&headp->head);
headp->maxid = 0;
*(void **)retp = headp;
return (0);
}
/*
* __db_txnlist_add --
* Add an element to our transaction linked list.
*
* PUBLIC: int __db_txnlist_add __P((void *, u_int32_t));
*/
int
__db_txnlist_add(listp, txnid)
void *listp;
u_int32_t txnid;
{
__db_txnhead *hp;
__db_txnlist *elp;
if ((elp = (__db_txnlist *)malloc(sizeof(__db_txnlist))) == NULL)
return (ENOMEM);
elp->txnid = txnid;
hp = (struct __db_txnhead *)listp;
LIST_INSERT_HEAD(&hp->head, elp, links);
if (txnid > hp->maxid)
hp->maxid = txnid;
return (0);
}
/*
* __db_txnlist_find --
* Checks to see if txnid is in the txnid list, returns 1 if found,
* 0 if not found.
*
* PUBLIC: int __db_txnlist_find __P((void *, u_int32_t));
*/
int
__db_txnlist_find(listp, txnid)
void *listp;
u_int32_t txnid;
{
__db_txnlist *p;
__db_txnhead *hp;
if ((hp = (struct __db_txnhead *)listp) == NULL)
return (DB_NOTFOUND);
if (hp->maxid < txnid) {
hp->maxid = txnid;
return (DB_NOTFOUND);
}
for (p = hp->head.lh_first; p != NULL; p = p->links.le_next)
if (p->txnid == txnid)
return (0);
return (DB_NOTFOUND);
}
#ifdef DEBUG
void
__db_txnlist_print(listp)
void *listp;
{
__db_txnlist *p;
__db_txnhead *hp;
hp = (struct __db_txnhead *)listp;
printf("Maxid: %lu\n", (u_long)hp->maxid);
for (p = hp->head.lh_first; p != NULL; p = p->links.le_next)
printf("TXNID: %lu\n", (u_long)p->txnid);
}
#endif

680
db2/db/db_dup.c Normal file
View File

@ -0,0 +1,680 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)db_dup.c 10.8 (Sleepycat) 7/20/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "db_swap.h"
#include "btree.h"
#include "db_am.h"
#include "common_ext.h"
static int __db_addpage __P((DB *,
PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **)));
static int __db_dsplit __P((DB *,
PAGE **, db_indx_t *, u_int32_t, int (*)(DB *, u_int32_t, PAGE **)));
/*
* __db_dput --
* Put a duplicate item onto a duplicate page at the given index.
*
* PUBLIC: int __db_dput __P((DB *,
* PUBLIC: DBT *, PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **)));
*/
int
__db_dput(dbp, dbt, pp, indxp, newfunc)
DB *dbp;
DBT *dbt;
PAGE **pp;
db_indx_t *indxp;
int (*newfunc) __P((DB *, u_int32_t, PAGE **));
{
BOVERFLOW bo;
DBT *data_dbtp, hdr_dbt, *hdr_dbtp;
PAGE *pagep;
db_indx_t size, isize;
db_pgno_t pgno;
int ret;
/*
* We need some access method independent threshold for when we put
* a duplicate item onto an overflow page.
*/
if (dbt->size > 0.25 * dbp->pgsize) {
if ((ret = __db_poff(dbp, dbt, &pgno, newfunc)) != 0)
return (ret);
bo.deleted = 0;
bo.type = B_OVERFLOW;
bo.tlen = dbt->size;
bo.pgno = pgno;
hdr_dbt.data = &bo;
hdr_dbt.size = isize = BOVERFLOW_SIZE;
hdr_dbtp = &hdr_dbt;
size = BOVERFLOW_PSIZE;
data_dbtp = NULL;
} else {
size = BKEYDATA_PSIZE(dbt->size);
isize = BKEYDATA_SIZE(dbt->size);
hdr_dbtp = NULL;
data_dbtp = dbt;
}
pagep = *pp;
if (size > P_FREESPACE(pagep)) {
if (*indxp == NUM_ENT(*pp) && NEXT_PGNO(*pp) == PGNO_INVALID)
ret = __db_addpage(dbp, pp, indxp, newfunc);
else
ret = __db_dsplit(dbp, pp, indxp, isize, newfunc);
if (ret != 0)
/* XXX: Pages not returned to free list. */
return (ret);
pagep = *pp;
}
/*
* Now, pagep references the page on which to insert and indx is the
* the location to insert.
*/
if ((ret = __db_pitem(dbp,
pagep, (u_int32_t)*indxp, isize, hdr_dbtp, data_dbtp)) != 0)
return (ret);
(void)memp_fset(dbp->mpf, pagep, DB_MPOOL_DIRTY);
return (0);
}
/*
* __db_drem --
* Remove a duplicate at the given index on the given page.
*
* PUBLIC: int __db_drem __P((DB *,
* PUBLIC: PAGE **, u_int32_t, int (*)(DB *, PAGE *)));
*/
int
__db_drem(dbp, pp, indx, freefunc)
DB *dbp;
PAGE **pp;
u_int32_t indx;
int (*freefunc) __P((DB *, PAGE *));
{
PAGE *pagep;
int ret;
pagep = *pp;
/* Check if we are freeing a big item. */
if (GET_BKEYDATA(pagep, indx)->type == B_OVERFLOW) {
if ((ret = __db_doff(dbp,
GET_BOVERFLOW(pagep, indx)->pgno, freefunc)) != 0)
return (ret);
ret = __db_ditem(dbp, pagep, indx, BOVERFLOW_SIZE);
} else
ret = __db_ditem(dbp, pagep, indx,
BKEYDATA_SIZE(GET_BKEYDATA(pagep, indx)->len));
if (ret != 0)
return (ret);
if (NUM_ENT(pagep) == 0) {
/*
* If the page is emptied, then the page is freed and the pp
* parameter is set to reference the next, locked page in the
* duplicate chain, if one exists. If there was no such page,
* then it is set to NULL.
*
* !!!
* __db_relink will set the dirty bit for us.
*/
if ((ret = __db_relink(dbp, pagep, pp, 0)) != 0)
return (ret);
if ((ret = freefunc(dbp, pagep)) != 0)
return (ret);
} else
(void)memp_fset(dbp->mpf, pagep, DB_MPOOL_DIRTY);
return (0);
}
/*
* __db_dend --
* Find the last page in a set of offpage duplicates.
*
* PUBLIC: int __db_dend __P((DB *, db_pgno_t, PAGE **));
*/
int
__db_dend(dbp, pgno, pagep)
DB *dbp;
db_pgno_t pgno;
PAGE **pagep;
{
PAGE *h;
int ret;
/*
* This implements DB_KEYLAST. The last page is returned in pp; pgno
* should be the page number of the first page of the duplicate chain.
*/
for (;;) {
if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) {
(void)__db_pgerr(dbp, pgno);
return (ret);
}
if ((pgno = NEXT_PGNO(h)) == PGNO_INVALID)
break;
(void)memp_fput(dbp->mpf, h, 0);
}
*pagep = h;
return (0);
}
/*
* __db_dsplit --
* Split a page of duplicates, calculating the split point based
* on an element of size "size" being added at "*indxp".
* On entry hp contains a pointer to the page-pointer of the original
* page. On exit, it returns a pointer to the page containing "*indxp"
* and "indxp" has been modified to reflect the index on the new page
* where the element should be added. The function returns with
* the page on which the insert should happen, not yet put.
*/
static int
__db_dsplit(dbp, hp, indxp, size, newfunc)
DB *dbp;
PAGE **hp;
db_indx_t *indxp;
u_int32_t size;
int (*newfunc) __P((DB *, u_int32_t, PAGE **));
{
PAGE *h, *np, *tp;
BKEYDATA *bk;
DBT page_dbt;
db_indx_t indx, nindex, oindex, sum;
db_indx_t halfbytes, i, lastsum;
int did_indx, ret, s;
h = *hp;
indx = *indxp;
/* Create a temporary page to do compaction onto. */
if ((tp = (PAGE *)malloc(dbp->pgsize)) == NULL)
return (ENOMEM);
#ifdef DEBUG
memset(tp, 0xff, dbp->pgsize);
#endif
/* Create new page for the split. */
if ((ret = newfunc(dbp, P_DUPLICATE, &np)) != 0) {
FREE(tp, dbp->pgsize);
return (ret);
}
P_INIT(np, dbp->pgsize, PGNO(np), PGNO(h), NEXT_PGNO(h), 0,
P_DUPLICATE);
P_INIT(tp, dbp->pgsize, PGNO(h), PREV_PGNO(h), PGNO(np), 0,
P_DUPLICATE);
/* Figure out the split point */
halfbytes = (dbp->pgsize - HOFFSET(h)) / 2;
did_indx = 0;
for (sum = 0, lastsum = 0, i = 0; i < NUM_ENT(h); i++) {
if (i == indx) {
sum += size;
if (lastsum < halfbytes && sum >= halfbytes) {
/* We've crossed the halfway point. */
if ((db_indx_t)(halfbytes - lastsum) <
(db_indx_t)(sum - halfbytes)) {
*hp = np;
*indxp = 0;
i--;
} else
*indxp = i;
break;
}
*indxp = i;
lastsum = sum;
did_indx = 1;
}
if (GET_BKEYDATA(h, i)->type == B_KEYDATA)
sum += BKEYDATA_SIZE(GET_BKEYDATA(h, i)->len);
else
sum += BOVERFLOW_SIZE;
if (lastsum < halfbytes && sum >= halfbytes) {
/* We've crossed the halfway point. */
if ((db_indx_t)(halfbytes - lastsum) <
(db_indx_t)(sum - halfbytes))
i--;
break;
}
}
/*
* Check if we have set the return values of the index pointer and
* page pointer.
*/
if (!did_indx) {
*hp = np;
*indxp = indx - i - 1;
}
if (DB_LOGGING(dbp)) {
page_dbt.size = dbp->pgsize;
page_dbt.data = h;
if ((ret = __db_split_log(dbp->dbenv->lg_info,
dbp->txn, &LSN(h), 0, DB_SPLITOLD, dbp->log_fileid,
PGNO(h), &page_dbt, &LSN(h))) != 0) {
FREE(tp, dbp->pgsize);
return (ret);
}
LSN(tp) = LSN(h);
}
/*
* If it's a btree, adjust the cursors.
*
* i is the index of the last element to stay on the page.
*/
if (dbp->type == DB_BTREE || dbp->type == DB_RECNO)
__bam_ca_split(dbp, PGNO(h), PGNO(h), PGNO(np), i + 1, 0);
for (nindex = 0, oindex = i + 1; oindex < NUM_ENT(h); oindex++) {
bk = GET_BKEYDATA(h, oindex);
if (bk->type == B_KEYDATA)
s = BKEYDATA_SIZE(bk->len);
else
s = BOVERFLOW_SIZE;
np->inp[nindex++] = HOFFSET(np) -= s;
memcpy((u_int8_t *)np + HOFFSET(np), bk, s);
NUM_ENT(np)++;
}
/*
* Now do data compaction by copying the remaining stuff onto the
* temporary page and then copying it back to the real page.
*/
for (nindex = 0, oindex = 0; oindex <= i; oindex++) {
bk = GET_BKEYDATA(h, oindex);
if (bk->type == B_KEYDATA)
s = BKEYDATA_SIZE(bk->len);
else
s = BOVERFLOW_SIZE;
tp->inp[nindex++] = HOFFSET(tp) -= s;
memcpy((u_int8_t *)tp + HOFFSET(tp), bk, s);
NUM_ENT(tp)++;
}
/*
* This page (the temporary) should be only half full, so we do two
* memcpy's, one for the top of the page and one for the bottom of
* the page. This way we avoid copying the middle which should be
* about half a page.
*/
memcpy(h, tp, LOFFSET(tp));
memcpy((u_int8_t *)h + HOFFSET(tp),
(u_int8_t *)tp + HOFFSET(tp), dbp->pgsize - HOFFSET(tp));
FREE(tp, dbp->pgsize);
if (DB_LOGGING(dbp)) {
page_dbt.size = dbp->pgsize;
page_dbt.data = h;
if ((ret = __db_split_log(dbp->dbenv->lg_info,
dbp->txn, &LSN(h), 0, DB_SPLITNEW, dbp->log_fileid,
PGNO(h), &page_dbt, &LSN(h))) != 0)
return (ret);
page_dbt.size = dbp->pgsize;
page_dbt.data = np;
if ((ret = __db_split_log(dbp->dbenv->lg_info,
dbp->txn, &LSN(np), 0, DB_SPLITNEW, dbp->log_fileid,
PGNO(np), &page_dbt, &LSN(np))) != 0)
return (ret);
}
/*
* Figure out if the location we're interested in is on the new
* page, and if so, reset the callers' pointer. Push the other
* page back to the store.
*/
if (*hp == h)
ret = memp_fput(dbp->mpf, np, DB_MPOOL_DIRTY);
else
ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
return (ret);
}
/*
* __db_ditem --
* Remove an item from a page.
*
* PUBLIC: int __db_ditem __P((DB *, PAGE *, int, u_int32_t));
*/
int
__db_ditem(dbp, pagep, indx, nbytes)
DB *dbp;
PAGE *pagep;
int indx;
u_int32_t nbytes;
{
DBT ldbt;
db_indx_t cnt, offset;
int ret;
u_int8_t *from;
if (DB_LOGGING(dbp)) {
ldbt.data = P_ENTRY(pagep, indx);
ldbt.size = nbytes;
if ((ret = __db_addrem_log(dbp->dbenv->lg_info, dbp->txn,
&LSN(pagep), 0, DB_REM_DUP, dbp->log_fileid, PGNO(pagep),
(u_int32_t)indx, nbytes, &ldbt, NULL, &LSN(pagep))) != 0)
return (ret);
}
/*
* If there's only a single item on the page, we don't have to
* work hard.
*/
if (NUM_ENT(pagep) == 1) {
NUM_ENT(pagep) = 0;
HOFFSET(pagep) = dbp->pgsize;
return (0);
}
/*
* Pack the remaining key/data items at the end of the page. Use
* memmove(3), the regions may overlap.
*/
from = (u_int8_t *)pagep + HOFFSET(pagep);
memmove(from + nbytes, from, pagep->inp[indx] - HOFFSET(pagep));
HOFFSET(pagep) += nbytes;
/* Adjust the indices' offsets. */
offset = pagep->inp[indx];
for (cnt = 0; cnt < NUM_ENT(pagep); ++cnt)
if (pagep->inp[cnt] < offset)
pagep->inp[cnt] += nbytes;
/* Shift the indices down. */
--NUM_ENT(pagep);
if (indx != NUM_ENT(pagep))
memmove(&pagep->inp[indx], &pagep->inp[indx + 1],
sizeof(db_indx_t) * (NUM_ENT(pagep) - indx));
/* If it's a btree, adjust the cursors. */
if (dbp->type == DB_BTREE || dbp->type == DB_RECNO)
__bam_ca_di(dbp, PGNO(pagep), indx, -1);
return (0);
}
/*
* __db_pitem --
* Put an item on a page.
*
* PUBLIC: int __db_pitem
* PUBLIC: __P((DB *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *));
*/
int
__db_pitem(dbp, pagep, indx, nbytes, hdr, data)
DB *dbp;
PAGE *pagep;
u_int32_t indx;
u_int32_t nbytes;
DBT *hdr, *data;
{
BKEYDATA bk;
DBT thdr;
int ret;
u_int8_t *p;
/*
* Put a single item onto a page. The logic figuring out where to
* insert and whether it fits is handled in the caller. All we do
* here is manage the page shuffling. We cheat a little bit in that
* we don't want to copy the dbt on a normal put twice. If hdr is
* NULL, we create a BKEYDATA structure on the page, otherwise, just
* copy the caller's information onto the page.
*
* This routine is also used to put entries onto the page where the
* entry is pre-built, e.g., during recovery. In this case, the hdr
* will point to the entry, and the data argument will be NULL.
*
* !!!
* There's a tremendous potential for off-by-one errors here, since
* the passed in header sizes must be adjusted for the structure's
* placeholder for the trailing variable-length data field.
*/
if (DB_LOGGING(dbp))
if ((ret = __db_addrem_log(dbp->dbenv->lg_info, dbp->txn,
&LSN(pagep), 0, DB_ADD_DUP, dbp->log_fileid, PGNO(pagep),
(u_int32_t)indx, nbytes, hdr, data, &LSN(pagep))) != 0)
return (ret);
if (hdr == NULL) {
bk.deleted = 0;
bk.type = B_KEYDATA;
bk.len = data == NULL ? 0 : data->size;
thdr.data = &bk;
thdr.size = SSZA(BKEYDATA, data);
hdr = &thdr;
}
/* Adjust the index table, then put the item on the page. */
if (indx != NUM_ENT(pagep))
memmove(&pagep->inp[indx + 1], &pagep->inp[indx],
sizeof(db_indx_t) * (NUM_ENT(pagep) - indx));
HOFFSET(pagep) -= nbytes;
pagep->inp[indx] = HOFFSET(pagep);
++NUM_ENT(pagep);
p = P_ENTRY(pagep, indx);
memcpy(p, hdr->data, hdr->size);
if (data != NULL)
memcpy(p + hdr->size, data->data, data->size);
/* If it's a btree, adjust the cursors. */
if (dbp->type == DB_BTREE || dbp->type == DB_RECNO)
__bam_ca_di(dbp, PGNO(pagep), indx, 1);
return (0);
}
/*
* __db_relink --
* Relink around a deleted page.
*
* PUBLIC: int __db_relink __P((DB *, PAGE *, PAGE **, int));
*/
int
__db_relink(dbp, pagep, new_next, needlock)
DB *dbp;
PAGE *pagep, **new_next;
int needlock;
{
PAGE *np, *pp;
DB_LOCK npl, ppl;
DB_LSN *nlsnp, *plsnp;
int ret;
ret = 0;
np = pp = NULL;
npl = ppl = LOCK_INVALID;
nlsnp = plsnp = NULL;
/* Retrieve and lock the two pages. */
if (pagep->next_pgno != PGNO_INVALID) {
if (needlock && (ret = __bam_lget(dbp,
0, pagep->next_pgno, DB_LOCK_WRITE, &npl)) != 0)
goto err;
if ((ret = memp_fget(dbp->mpf,
&pagep->next_pgno, 0, &np)) != 0) {
(void)__db_pgerr(dbp, pagep->next_pgno);
goto err;
}
nlsnp = &np->lsn;
}
if (pagep->prev_pgno != PGNO_INVALID) {
if (needlock && (ret = __bam_lget(dbp,
0, pagep->prev_pgno, DB_LOCK_WRITE, &ppl)) != 0)
goto err;
if ((ret = memp_fget(dbp->mpf,
&pagep->prev_pgno, 0, &pp)) != 0) {
(void)__db_pgerr(dbp, pagep->next_pgno);
goto err;
}
plsnp = &pp->lsn;
}
/* Log the change. */
if (DB_LOGGING(dbp)) {
if ((ret = __db_relink_log(dbp->dbenv->lg_info, dbp->txn,
&pagep->lsn, 0, dbp->log_fileid, pagep->pgno, &pagep->lsn,
pagep->prev_pgno, plsnp, pagep->next_pgno, nlsnp)) != 0)
goto err;
if (np != NULL)
np->lsn = pagep->lsn;
if (pp != NULL)
pp->lsn = pagep->lsn;
}
/*
* Modify and release the two pages.
*
* !!!
* The parameter new_next gets set to the page following the page we
* are removing. If there is no following page, then new_next gets
* set to NULL.
*/
if (np != NULL) {
np->prev_pgno = pagep->prev_pgno;
if (new_next == NULL)
ret = memp_fput(dbp->mpf, np, DB_MPOOL_DIRTY);
else {
*new_next = np;
ret = memp_fset(dbp->mpf, np, DB_MPOOL_DIRTY);
}
if (ret != 0)
goto err;
if (needlock)
(void)__bam_lput(dbp, npl);
} else if (new_next != NULL)
*new_next = NULL;
if (pp != NULL) {
pp->next_pgno = pagep->next_pgno;
if ((ret = memp_fput(dbp->mpf, pp, DB_MPOOL_DIRTY)) != 0)
goto err;
if (needlock)
(void)__bam_lput(dbp, ppl);
}
return (0);
err: if (np != NULL)
(void)memp_fput(dbp->mpf, np, 0);
if (needlock && npl != LOCK_INVALID)
(void)__bam_lput(dbp, npl);
if (pp != NULL)
(void)memp_fput(dbp->mpf, pp, 0);
if (needlock && ppl != LOCK_INVALID)
(void)__bam_lput(dbp, ppl);
return (ret);
}
/*
* __db_ddup --
* Delete an offpage chain of duplicates.
*
* PUBLIC: int __db_ddup __P((DB *, db_pgno_t, int (*)(DB *, PAGE *)));
*/
int
__db_ddup(dbp, pgno, freefunc)
DB *dbp;
db_pgno_t pgno;
int (*freefunc) __P((DB *, PAGE *));
{
PAGE *pagep;
DBT tmp_dbt;
int ret;
do {
if ((ret = memp_fget(dbp->mpf, &pgno, 0, &pagep)) != 0) {
(void)__db_pgerr(dbp, pgno);
return (ret);
}
if (DB_LOGGING(dbp)) {
tmp_dbt.data = pagep;
tmp_dbt.size = dbp->pgsize;
if ((ret = __db_split_log(dbp->dbenv->lg_info, dbp->txn,
&LSN(pagep), 0, DB_SPLITOLD, dbp->log_fileid,
PGNO(pagep), &tmp_dbt, &LSN(pagep))) != 0)
return (ret);
}
pgno = pagep->next_pgno;
if ((ret = freefunc(dbp, pagep)) != 0)
return (ret);
} while (pgno != PGNO_INVALID);
return (0);
}
/*
* __db_addpage --
* Create a new page and link it onto the next_pgno field of the
* current page.
*/
static int
__db_addpage(dbp, hp, indxp, newfunc)
DB *dbp;
PAGE **hp;
db_indx_t *indxp;
int (*newfunc) __P((DB *, u_int32_t, PAGE **));
{
PAGE *newpage;
int ret;
if ((ret = newfunc(dbp, P_DUPLICATE, &newpage)) != 0)
return (ret);
if (DB_LOGGING(dbp)) {
if ((ret = __db_addpage_log(dbp->dbenv->lg_info,
dbp->txn, &LSN(*hp), 0, dbp->log_fileid,
PGNO(*hp), &LSN(*hp), PGNO(newpage), &LSN(newpage))) != 0) {
return (ret);
}
LSN(newpage) = LSN(*hp);
}
PREV_PGNO(newpage) = PGNO(*hp);
NEXT_PGNO(*hp) = PGNO(newpage);
if ((ret = memp_fput(dbp->mpf, *hp, DB_MPOOL_DIRTY)) != 0)
return (ret);
*hp = newpage;
*indxp = 0;
return (0);
}

383
db2/db/db_overflow.c Normal file
View File

@ -0,0 +1,383 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
* Keith Bostic. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)db_overflow.c 10.4 (Sleepycat) 7/2/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "db_am.h"
#include "common_ext.h"
/*
* Big key/data code.
*
* Big key and data entries are stored on linked lists of pages. The initial
* reference is a structure with the total length of the item and the page
* number where it begins. Each entry in the linked list contains a pointer
* to the next page of data, and so on.
*/
/*
* __db_goff --
* Get an offpage item.
*
* PUBLIC: int __db_goff __P((DB *, DBT *,
* PUBLIC: u_int32_t, db_pgno_t, void **, u_int32_t *));
*/
int
__db_goff(dbp, dbt, tlen, pgno, bpp, bpsz)
DB *dbp;
DBT *dbt;
u_int32_t tlen;
db_pgno_t pgno;
void **bpp;
u_int32_t *bpsz;
{
PAGE *h;
db_indx_t bytes;
int ret;
u_int32_t curoff, needed, start;
u_int8_t *p, *src;
/*
* Check if the buffer is big enough; if it is not and we are
* allowed to malloc space, then we'll malloc it. If we are
* not (DB_DBT_USERMEM), then we'll set the dbt and return
* appropriately.
*/
if (F_ISSET(dbt, DB_DBT_PARTIAL)) {
start = dbt->doff;
needed = dbt->dlen;
} else {
start = 0;
needed = tlen;
}
/*
* Allocate any necessary memory.
*
* XXX: Never allocate 0 bytes;
*/
if (F_ISSET(dbt, DB_DBT_USERMEM)) {
if (needed > dbt->ulen) {
dbt->size = needed;
return (ENOMEM);
}
} else if (F_ISSET(dbt, DB_DBT_MALLOC)) {
dbt->data = dbp->db_malloc == NULL ?
(void *)malloc(needed + 1) :
(void *)dbp->db_malloc(needed + 1);
if (dbt->data == NULL)
return (ENOMEM);
} else if (*bpsz == 0 || *bpsz < needed) {
*bpp = (*bpp == NULL ?
(void *)malloc(needed + 1) :
(void *)realloc(*bpp, needed + 1));
if (*bpp == NULL)
return (ENOMEM);
*bpsz = needed + 1;
dbt->data = *bpp;
} else
dbt->data = *bpp;
/*
* Step through the linked list of pages, copying the data on each
* one into the buffer. Never copy more than the total data length.
*/
dbt->size = needed;
for (curoff = 0, p = dbt->data; pgno != P_INVALID && needed > 0;) {
if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) {
(void)__db_pgerr(dbp, pgno);
return (ret);
}
/* Check if we need any bytes from this page. */
if (curoff + OV_LEN(h) >= start) {
src = (u_int8_t *)h + P_OVERHEAD;
bytes = OV_LEN(h);
if (start > curoff) {
src += start - curoff;
bytes -= start - curoff;
}
if (bytes > needed)
bytes = needed;
memcpy(p, src, bytes);
p += bytes;
needed -= bytes;
}
curoff += OV_LEN(h);
pgno = h->next_pgno;
memp_fput(dbp->mpf, h, 0);
}
return (0);
}
/*
* __db_poff --
* Put an offpage item.
*
* PUBLIC: int __db_poff __P((DB *, const DBT *, db_pgno_t *,
* PUBLIC: int (*)(DB *, u_int32_t, PAGE **)));
*/
int
__db_poff(dbp, dbt, pgnop, newfunc)
DB *dbp;
const DBT *dbt;
db_pgno_t *pgnop;
int (*newfunc) __P((DB *, u_int32_t, PAGE **));
{
PAGE *pagep, *lastp;
DB_LSN new_lsn, null_lsn;
DBT tmp_dbt;
db_indx_t pagespace;
u_int32_t sz;
u_int8_t *p;
int ret;
/*
* Allocate pages and copy the key/data item into them. Calculate the
* number of bytes we get for pages we fill completely with a single
* item.
*/
pagespace = P_MAXSPACE(dbp->pgsize);
lastp = NULL;
for (p = dbt->data,
sz = dbt->size; sz > 0; p += pagespace, sz -= pagespace) {
/*
* Reduce pagespace so we terminate the loop correctly and
* don't copy too much data.
*/
if (sz < pagespace)
pagespace = sz;
/*
* Allocate and initialize a new page and copy all or part of
* the item onto the page. If sz is less than pagespace, we
* have a partial record.
*/
if ((ret = newfunc(dbp, P_OVERFLOW, &pagep)) != 0)
return (ret);
if (DB_LOGGING(dbp)) {
tmp_dbt.data = p;
tmp_dbt.size = pagespace;
ZERO_LSN(null_lsn);
if ((ret = __db_big_log(dbp->dbenv->lg_info, dbp->txn,
&new_lsn, 0, DB_ADD_BIG, dbp->log_fileid,
PGNO(pagep), lastp ? PGNO(lastp) : PGNO_INVALID,
PGNO_INVALID, &tmp_dbt, &LSN(pagep),
lastp == NULL ? &null_lsn : &LSN(lastp),
&null_lsn)) != 0)
return (ret);
/* Move lsn onto page. */
if (lastp)
LSN(lastp) = new_lsn;
LSN(pagep) = new_lsn;
}
P_INIT(pagep, dbp->pgsize,
PGNO(pagep), PGNO_INVALID, PGNO_INVALID, 0, P_OVERFLOW);
OV_LEN(pagep) = pagespace;
OV_REF(pagep) = 1;
memcpy((u_int8_t *)pagep + P_OVERHEAD, p, pagespace);
/*
* If this is the first entry, update the user's info.
* Otherwise, update the entry on the last page filled
* in and release that page.
*/
if (lastp == NULL)
*pgnop = PGNO(pagep);
else {
lastp->next_pgno = PGNO(pagep);
pagep->prev_pgno = PGNO(lastp);
(void)memp_fput(dbp->mpf, lastp, DB_MPOOL_DIRTY);
}
lastp = pagep;
}
(void)memp_fput(dbp->mpf, lastp, DB_MPOOL_DIRTY);
return (0);
}
/*
* __db_ioff --
* Increment the reference count on an overflow page.
*
* PUBLIC: int __db_ioff __P((DB *, db_pgno_t));
*/
int
__db_ioff(dbp, pgno)
DB *dbp;
db_pgno_t pgno;
{
PAGE *h;
int ret;
if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) {
(void)__db_pgerr(dbp, pgno);
return (ret);
}
++OV_REF(h);
if (DB_LOGGING(dbp) && (ret = __db_ovref_log(dbp->dbenv->lg_info,
dbp->txn, &LSN(h), 0, dbp->log_fileid, h->pgno, &LSN(h))) != 0)
return (ret);
(void)memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
return (0);
}
/*
* __db_doff --
* Delete an offpage chain of overflow pages.
*
* PUBLIC: int __db_doff __P((DB *, db_pgno_t, int (*)(DB *, PAGE *)));
*/
int
__db_doff(dbp, pgno, freefunc)
DB *dbp;
db_pgno_t pgno;
int (*freefunc) __P((DB *, PAGE *));
{
PAGE *pagep;
DB_LSN null_lsn;
DBT tmp_dbt;
int ret;
do {
if ((ret = memp_fget(dbp->mpf, &pgno, 0, &pagep)) != 0) {
(void)__db_pgerr(dbp, pgno);
return (ret);
}
/*
* If it's an overflow page and it's referenced by more than
* one key/data item, decrement the reference count and return.
*/
if (TYPE(pagep) == P_OVERFLOW && OV_REF(pagep) > 1) {
--OV_REF(pagep);
(void)memp_fput(dbp->mpf, pagep, DB_MPOOL_DIRTY);
return (0);
}
if (DB_LOGGING(dbp)) {
tmp_dbt.data = (u_int8_t *)pagep + P_OVERHEAD;
tmp_dbt.size = OV_LEN(pagep);
ZERO_LSN(null_lsn);
if ((ret = __db_big_log(dbp->dbenv->lg_info, dbp->txn,
&LSN(pagep), 0, DB_REM_BIG, dbp->log_fileid,
PGNO(pagep), PREV_PGNO(pagep), NEXT_PGNO(pagep),
&tmp_dbt, &LSN(pagep), &null_lsn, &null_lsn)) != 0)
return (ret);
}
pgno = pagep->next_pgno;
if ((ret = freefunc(dbp, pagep)) != 0)
return (ret);
} while (pgno != PGNO_INVALID);
return (0);
}
/*
* __db_moff --
* Match on overflow pages.
*
* Given a starting page number and a key, return <0, 0, >0 to indicate if the
* key on the page is less than, equal to or greater than the key specified.
*
* PUBLIC: int __db_moff __P((DB *, const DBT *, db_pgno_t));
*/
int
__db_moff(dbp, dbt, pgno)
DB *dbp;
const DBT *dbt;
db_pgno_t pgno;
{
PAGE *pagep;
u_int32_t cmp_bytes, key_left;
int ret;
u_int8_t *p1, *p2;
/* While there are both keys to compare. */
for (ret = 0, p1 = dbt->data,
key_left = dbt->size; key_left > 0 && pgno != PGNO_INVALID;) {
if (memp_fget(dbp->mpf, &pgno, 0, &pagep) != 0) {
(void)__db_pgerr(dbp, pgno);
return (0); /* No system error return. */
}
cmp_bytes = OV_LEN(pagep) < key_left ? OV_LEN(pagep) : key_left;
key_left -= cmp_bytes;
for (p2 =
(u_int8_t *)pagep + P_OVERHEAD; cmp_bytes-- > 0; ++p1, ++p2)
if (*p1 != *p2) {
ret = (long)*p1 - (long)*p2;
break;
}
pgno = NEXT_PGNO(pagep);
(void)memp_fput(dbp->mpf, pagep, 0);
if (ret != 0)
return (ret);
}
if (key_left > 0) /* DBT is longer than page key. */
return (-1);
if (pgno != PGNO_INVALID) /* DBT is shorter than page key. */
return (1);
return (0);
}

785
db2/db/db_pr.c Normal file
View File

@ -0,0 +1,785 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)db_pr.c 10.14 (Sleepycat) 8/17/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "btree.h"
#include "hash.h"
#include "db_am.h"
static void __db_proff __P((void *));
static void __db_psize __P((DB_MPOOLFILE *));
/*
* __db_loadme --
* Force loading of this file.
*
* PUBLIC: void __db_loadme __P((void));
*/
void
__db_loadme()
{
getpid();
}
static FILE *set_fp;
/*
* 64K is the maximum page size, so by default we check for offsets
* larger than that, and, where possible, we refine the test.
*/
#define PSIZE_BOUNDARY (64 * 1024 + 1)
static size_t set_psize = PSIZE_BOUNDARY;
/*
* __db_prinit --
* Initialize tree printing routines.
*
* PUBLIC: FILE *__db_prinit __P((FILE *));
*/
FILE *
__db_prinit(fp)
FILE *fp;
{
if (set_fp == NULL)
set_fp = fp == NULL ? stdout : fp;
return (set_fp);
}
/*
* __db_dump --
* Dump the tree to a file.
*
* PUBLIC: int __db_dump __P((DB *, char *, int));
*/
int
__db_dump(dbp, name, all)
DB *dbp;
char *name;
int all;
{
FILE *fp, *save_fp;
save_fp = NULL; /* XXX: Shut the compiler up. */
if (set_psize == PSIZE_BOUNDARY)
__db_psize(dbp->mpf);
if (name != NULL) {
if ((fp = fopen(name, "w")) == NULL)
return (errno);
save_fp = set_fp;
set_fp = fp;
} else
fp = __db_prinit(NULL);
(void)__db_prdb(dbp);
if (dbp->type == DB_HASH)
(void)__db_prhash(dbp);
else
(void)__db_prbtree(dbp);
fprintf(fp, "%s\n", DB_LINE);
__db_prtree(dbp->mpf, all);
if (name != NULL) {
(void)fclose(fp);
set_fp = save_fp;
}
return (0);
}
/*
* __db_prdb --
* Print out the DB structure information.
*
* PUBLIC: int __db_prdb __P((DB *));
*/
int
__db_prdb(dbp)
DB *dbp;
{
static const FN fn[] = {
{ DB_AM_DUP, "duplicates" },
{ DB_AM_INMEM, "in-memory" },
{ DB_AM_LOCKING, "locking" },
{ DB_AM_LOGGING, "logging" },
{ DB_AM_MLOCAL, "local mpool" },
{ DB_AM_PGDEF, "default page size" },
{ DB_AM_RDONLY, "read-only" },
{ DB_AM_RECOVER, "recover" },
{ DB_AM_SWAP, "needswap" },
{ DB_AM_THREAD, "thread" },
{ DB_BT_RECNUM, "btree:records" },
{ DB_HS_DIRTYMETA, "hash:dirty-meta" },
{ DB_RE_DELIMITER, "recno:delimiter" },
{ DB_RE_FIXEDLEN, "recno:fixed-length" },
{ DB_RE_PAD, "recno:pad" },
{ DB_RE_RENUMBER, "recno:renumber" },
{ DB_RE_SNAPSHOT, "recno:snapshot" },
{ 0 },
};
FILE *fp;
const char *t;
fp = __db_prinit(NULL);
switch (dbp->type) {
case DB_BTREE:
t = "btree";
break;
case DB_HASH:
t = "hash";
break;
case DB_RECNO:
t = "recno";
break;
default:
t = "UNKNOWN";
break;
}
fprintf(fp, "%s ", t);
__db_prflags(dbp->flags, fn);
fprintf(fp, "\n");
return (0);
}
/*
* __db_prbtree --
* Print out the btree internal information.
*
* PUBLIC: int __db_prbtree __P((DB *));
*/
int
__db_prbtree(dbp)
DB *dbp;
{
static const FN mfn[] = {
{ BTM_DUP, "duplicates" },
{ BTM_RECNO, "recno" },
{ 0 },
};
BTMETA *mp;
BTREE *t;
DB_LOCK lock;
EPG *sp;
FILE *fp;
RECNO *rp;
db_pgno_t i;
int ret;
t = dbp->internal;
fp = __db_prinit(NULL);
(void)fprintf(fp, "%s\nOn-page metadata:\n", DB_LINE);
i = PGNO_METADATA;
if ((ret = __bam_lget(dbp, 0, PGNO_METADATA, DB_LOCK_READ, &lock)) != 0)
return (ret);
if ((ret = __bam_pget(dbp, (PAGE **)&mp, &i, 0)) != 0)
return (ret);
(void)fprintf(fp, "magic %#lx\n", (u_long)mp->magic);
(void)fprintf(fp, "version %lu\n", (u_long)mp->version);
(void)fprintf(fp, "pagesize %lu\n", (u_long)mp->pagesize);
(void)fprintf(fp, "maxkey: %lu minkey: %lu\n",
(u_long)mp->maxkey, (u_long)mp->minkey);
(void)fprintf(fp, "free %lu\n", (u_long)mp->free);
(void)fprintf(fp, "flags %lu", (u_long)mp->flags);
__db_prflags(mp->flags, mfn);
(void)fprintf(fp, "\n");
(void)memp_fput(dbp->mpf, mp, 0);
(void)__bam_lput(dbp, lock);
(void)fprintf(fp, "%s\nDB_INFO:\n", DB_LINE);
(void)fprintf(fp, "bt_maxkey: %lu bt_minkey: %lu\n",
(u_long)t->bt_maxkey, (u_long)t->bt_minkey);
(void)fprintf(fp, "bt_compare: %#lx bt_prefix: %#lx\n",
(u_long)t->bt_compare, (u_long)t->bt_prefix);
if ((rp = t->bt_recno) != NULL) {
(void)fprintf(fp,
"re_delim: %#lx re_pad: %#lx re_len: %lu re_source: %s\n",
(u_long)rp->re_delim, (u_long)rp->re_pad,
(u_long)rp->re_len,
rp->re_source == NULL ? "" : rp->re_source);
(void)fprintf(fp,
"cmap: %#lx smap: %#lx emap: %#lx msize: %lu\n",
(u_long)rp->re_cmap, (u_long)rp->re_smap,
(u_long)rp->re_emap, (u_long)rp->re_msize);
}
(void)fprintf(fp, "stack:");
for (sp = t->bt_stack; sp < t->bt_sp; ++sp)
(void)fprintf(fp, " %lu", (u_long)sp->page->pgno);
(void)fprintf(fp, "\n");
(void)fprintf(fp, "ovflsize: %lu\n", (u_long)t->bt_ovflsize);
(void)fflush(fp);
return (0);
}
/*
* __db_prhash --
* Print out the hash internal information.
*
* PUBLIC: int __db_prhash __P((DB *));
*/
int
__db_prhash(dbp)
DB *dbp;
{
FILE *fp;
HTAB *t;
int i, put_page, ret;
db_pgno_t pgno;
t = dbp->internal;
fp = __db_prinit(NULL);
fprintf(fp, "\thash_accesses %lu\n", (u_long)t->hash_accesses);
fprintf(fp, "\thash_collisions %lu\n", (u_long)t->hash_collisions);
fprintf(fp, "\thash_expansions %lu\n", (u_long)t->hash_expansions);
fprintf(fp, "\thash_overflows %lu\n", (u_long)t->hash_overflows);
fprintf(fp, "\thash_bigpages %lu\n", (u_long)t->hash_bigpages);
fprintf(fp, "\n");
if (t->hdr == NULL) {
pgno = PGNO_METADATA;
if ((ret = memp_fget(dbp->mpf, &pgno, 0, &t->hdr)) != 0)
return (ret);
put_page = 1;
} else
put_page = 0;
fprintf(fp, "\tmagic %#lx\n", (u_long)t->hdr->magic);
fprintf(fp, "\tversion %lu\n", (u_long)t->hdr->version);
fprintf(fp, "\tpagesize %lu\n", (u_long)t->hdr->pagesize);
fprintf(fp, "\tovfl_point %lu\n", (u_long)t->hdr->ovfl_point);
fprintf(fp, "\tlast_freed %lu\n", (u_long)t->hdr->last_freed);
fprintf(fp, "\tmax_bucket %lu\n", (u_long)t->hdr->max_bucket);
fprintf(fp, "\thigh_mask %#lx\n", (u_long)t->hdr->high_mask);
fprintf(fp, "\tlow_mask %#lx\n", (u_long)t->hdr->low_mask);
fprintf(fp, "\tffactor %lu\n", (u_long)t->hdr->ffactor);
fprintf(fp, "\tnelem %lu\n", (u_long)t->hdr->nelem);
fprintf(fp, "\th_charkey %#lx\n", (u_long)t->hdr->h_charkey);
for (i = 0; i < NCACHED; i++)
fprintf(fp, "%lu ", (u_long)t->hdr->spares[i]);
fprintf(fp, "\n");
(void)fflush(fp);
if (put_page) {
(void)memp_fput(dbp->mpf, (PAGE *)t->hdr, 0);
t->hdr = NULL;
}
return (0);
}
/*
* __db_prtree --
* Print out the entire tree.
*
* PUBLIC: int __db_prtree __P((DB_MPOOLFILE *, int));
*/
int
__db_prtree(mpf, all)
DB_MPOOLFILE *mpf;
int all;
{
PAGE *h;
db_pgno_t i;
int ret, t_ret;
if (set_psize == PSIZE_BOUNDARY)
__db_psize(mpf);
ret = 0;
for (i = PGNO_ROOT;; ++i) {
if ((ret = memp_fget(mpf, &i, 0, &h)) != 0)
break;
if (TYPE(h) != P_INVALID)
if ((t_ret = __db_prpage(h, all)) != 0 && ret == 0)
ret = t_ret;
(void)memp_fput(mpf, h, 0);
}
(void)fflush(__db_prinit(NULL));
return (ret);
}
/*
* __db_prnpage
* -- Print out a specific page.
*
* PUBLIC: int __db_prnpage __P((DB_MPOOLFILE *, db_pgno_t));
*/
int
__db_prnpage(mpf, pgno)
DB_MPOOLFILE *mpf;
db_pgno_t pgno;
{
PAGE *h;
int ret;
if (set_psize == PSIZE_BOUNDARY)
__db_psize(mpf);
if ((ret = memp_fget(mpf, &pgno, 0, &h)) != 0)
return (ret);
ret = __db_prpage(h, 1);
(void)fflush(__db_prinit(NULL));
(void)memp_fput(mpf, h, 0);
return (ret);
}
/*
* __db_prpage
* -- Print out a page.
*
* PUBLIC: int __db_prpage __P((PAGE *, int));
*/
int
__db_prpage(h, all)
PAGE *h;
int all;
{
BINTERNAL *bi;
BKEYDATA *bk;
HKEYDATA *hkd;
HOFFPAGE a_hkd;
FILE *fp;
RINTERNAL *ri;
db_indx_t dlen, len, i;
db_pgno_t pgno;
u_int8_t *p;
int deleted, ret;
const char *s;
bi = NULL; /* XXX: Shut the compiler up. */
bk = NULL;
hkd = NULL;
ri = NULL;
fp = __db_prinit(NULL);
switch (TYPE(h)) {
case P_DUPLICATE:
s = "duplicate";
break;
case P_HASH:
s = "hash";
break;
case P_IBTREE:
s = "btree internal";
break;
case P_INVALID:
s = "invalid";
break;
case P_IRECNO:
s = "recno internal";
break;
case P_LBTREE:
s = "btree leaf";
break;
case P_LRECNO:
s = "recno leaf";
break;
case P_OVERFLOW:
s = "overflow";
break;
default:
fprintf(fp, "ILLEGAL PAGE TYPE: page: %lu type: %lu\n",
(u_long)h->pgno, (u_long)TYPE(h));
return (1);
}
fprintf(fp, "page %4lu: (%s)\n", (u_long)h->pgno, s);
fprintf(fp, " lsn.file: %lu lsn.offset: %lu",
(u_long)LSN(h).file, (u_long)LSN(h).offset);
if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO ||
(TYPE(h) == P_LRECNO && h->pgno == PGNO_ROOT))
fprintf(fp, " total records: %4lu", (u_long)RE_NREC(h));
fprintf(fp, "\n");
if (TYPE(h) == P_LBTREE || TYPE(h) == P_LRECNO)
fprintf(fp, " prev: %4lu next: %4lu",
(u_long)PREV_PGNO(h), (u_long)NEXT_PGNO(h));
if (TYPE(h) == P_IBTREE || TYPE(h) == P_LBTREE)
fprintf(fp, " level: %2lu", (u_long)h->level);
if (TYPE(h) == P_OVERFLOW) {
fprintf(fp, " ref cnt: %4lu ", (u_long)OV_REF(h));
__db_pr((u_int8_t *)h + P_OVERHEAD, OV_LEN(h));
return (0);
}
fprintf(fp, " entries: %4lu", (u_long)NUM_ENT(h));
fprintf(fp, " offset: %4lu\n", (u_long)HOFFSET(h));
if (!all || TYPE(h) == P_INVALID)
return (0);
ret = 0;
for (i = 0; i < NUM_ENT(h); i++) {
if (P_ENTRY(h, i) - (u_int8_t *)h < P_OVERHEAD ||
(size_t)(P_ENTRY(h, i) - (u_int8_t *)h) >= set_psize) {
fprintf(fp,
"ILLEGAL PAGE OFFSET: indx: %lu of %lu\n",
(u_long)i, (u_long)h->inp[i]);
ret = EINVAL;
continue;
}
deleted = 0;
switch (TYPE(h)) {
case P_HASH:
hkd = GET_HKEYDATA(h, i);
break;
case P_IBTREE:
bi = GET_BINTERNAL(h, i);
break;
case P_IRECNO:
ri = GET_RINTERNAL(h, i);
break;
case P_LBTREE:
bk = GET_BKEYDATA(h, i);
deleted = i % 2 == 0 &&
GET_BKEYDATA(h, i + O_INDX)->deleted;
break;
case P_LRECNO:
case P_DUPLICATE:
bk = GET_BKEYDATA(h, i);
deleted = GET_BKEYDATA(h, i)->deleted;
break;
default:
fprintf(fp,
"ILLEGAL PAGE ITEM: %lu\n", (u_long)TYPE(h));
ret = EINVAL;
continue;
}
fprintf(fp, " %s[%03lu] %4lu ",
deleted ? "D" : " ", (u_long)i, (u_long)h->inp[i]);
switch (TYPE(h)) {
case P_HASH:
switch (hkd->type) {
case H_OFFDUP:
memcpy(&pgno,
(u_int8_t *)hkd + SSZ(HOFFDUP, pgno),
sizeof(db_pgno_t));
fprintf(fp,
"%4lu [offpage dups]\n", (u_long)pgno);
break;
case H_DUPLICATE:
/*
* If this is the first item on a page, then
* we cannot figure out how long it is, so
* we only print the first one in the duplicate
* set.
*/
if (i != 0)
len = LEN_HKEYDATA(h, 0, i);
else
len = 1;
fprintf(fp, "Duplicates:\n");
for (p = hkd->data; p < hkd->data + len;) {
memcpy(&dlen, p, sizeof(db_indx_t));
p += sizeof(db_indx_t);
fprintf(fp, "\t\t");
__db_pr(p, dlen);
p += sizeof(db_indx_t) + dlen;
}
break;
case H_KEYDATA:
if (i != 0)
__db_pr(hkd->data,
LEN_HKEYDATA(h, 0, i));
else
fprintf(fp, "%s\n", hkd->data);
break;
case H_OFFPAGE:
memcpy(&a_hkd, hkd, HOFFPAGE_SIZE);
fprintf(fp,
"overflow: total len: %4lu page: %4lu\n",
(u_long)a_hkd.tlen, (u_long)a_hkd.pgno);
break;
}
break;
case P_IBTREE:
fprintf(fp, "count: %4lu pgno: %4lu ",
(u_long)bi->nrecs, (u_long)bi->pgno);
switch (bi->type) {
case B_KEYDATA:
__db_pr(bi->data, bi->len);
break;
case B_DUPLICATE:
case B_OVERFLOW:
__db_proff(bi->data);
break;
default:
fprintf(fp, "ILLEGAL BINTERNAL TYPE: %lu\n",
(u_long)bi->type);
ret = EINVAL;
break;
}
break;
case P_IRECNO:
fprintf(fp, "entries %4lu pgno %4lu\n",
(u_long)ri->nrecs, (u_long)ri->pgno);
break;
case P_LBTREE:
case P_LRECNO:
case P_DUPLICATE:
switch (bk->type) {
case B_KEYDATA:
__db_pr(bk->data, bk->len);
break;
case B_DUPLICATE:
case B_OVERFLOW:
__db_proff(bk);
break;
default:
fprintf(fp,
"ILLEGAL DUPLICATE/LBTREE/LRECNO TYPE: %lu\n",
(u_long)bk->type);
ret = EINVAL;
break;
}
break;
}
}
(void)fflush(fp);
return (ret);
}
/*
* __db_isbad
* -- Decide if a page is corrupted.
*
* PUBLIC: int __db_isbad __P((PAGE *, int));
*/
int
__db_isbad(h, die)
PAGE *h;
int die;
{
BINTERNAL *bi;
BKEYDATA *bk;
HKEYDATA *hkd;
FILE *fp;
db_indx_t i;
bi = NULL; /* XXX: Shut the compiler up. */
bk = NULL;
hkd = NULL;
fp = __db_prinit(NULL);
switch (TYPE(h)) {
case P_DUPLICATE:
case P_HASH:
case P_IBTREE:
case P_INVALID:
case P_IRECNO:
case P_LBTREE:
case P_LRECNO:
case P_OVERFLOW:
break;
default:
fprintf(fp, "ILLEGAL PAGE TYPE: page: %lu type: %lu\n",
(u_long)h->pgno, (u_long)TYPE(h));
goto bad;
}
for (i = 0; i < NUM_ENT(h); i++) {
if (P_ENTRY(h, i) - (u_int8_t *)h < P_OVERHEAD ||
(size_t)(P_ENTRY(h, i) - (u_int8_t *)h) >= set_psize) {
fprintf(fp,
"ILLEGAL PAGE OFFSET: indx: %lu of %lu\n",
(u_long)i, (u_long)h->inp[i]);
goto bad;
}
switch (TYPE(h)) {
case P_HASH:
hkd = GET_HKEYDATA(h, i);
if (hkd->type != H_OFFDUP &&
hkd->type != H_DUPLICATE &&
hkd->type != H_KEYDATA &&
hkd->type != H_OFFPAGE) {
fprintf(fp, "ILLEGAL HASH TYPE: %lu\n",
(u_long)hkd->type);
goto bad;
}
break;
case P_IBTREE:
bi = GET_BINTERNAL(h, i);
if (bi->type != B_KEYDATA &&
bi->type != B_DUPLICATE &&
bi->type != B_OVERFLOW) {
fprintf(fp, "ILLEGAL BINTERNAL TYPE: %lu\n",
(u_long)bi->type);
goto bad;
}
break;
case P_IRECNO:
case P_LBTREE:
case P_LRECNO:
break;
case P_DUPLICATE:
bk = GET_BKEYDATA(h, i);
if (bk->type != B_KEYDATA &&
bk->type != B_DUPLICATE &&
bk->type != B_OVERFLOW) {
fprintf(fp,
"ILLEGAL DUPLICATE/LBTREE/LRECNO TYPE: %lu\n",
(u_long)bk->type);
goto bad;
}
break;
default:
fprintf(fp,
"ILLEGAL PAGE ITEM: %lu\n", (u_long)TYPE(h));
goto bad;
}
}
return (0);
bad: if (die) {
abort();
/* NOTREACHED */
}
return (1);
}
/*
* __db_pr --
* Print out a data element.
*
* PUBLIC: void __db_pr __P((u_int8_t *, u_int32_t));
*/
void
__db_pr(p, len)
u_int8_t *p;
u_int32_t len;
{
FILE *fp;
int i, lastch;
fp = __db_prinit(NULL);
fprintf(fp, "len: %3lu", (u_long)len);
lastch = '.';
if (len != 0) {
fprintf(fp, " data: ");
for (i = len <= 20 ? len : 20; i > 0; --i, ++p) {
lastch = *p;
if (isprint(*p) || *p == '\n')
fprintf(fp, "%c", *p);
else
fprintf(fp, "%#x", (u_int)*p);
}
if (len > 20) {
fprintf(fp, "...");
lastch = '.';
}
}
if (lastch != '\n')
fprintf(fp, "\n");
}
/*
* __db_proff --
* Print out an off-page element.
*/
static void
__db_proff(vp)
void *vp;
{
FILE *fp;
BOVERFLOW *p;
fp = __db_prinit(NULL);
p = vp;
switch (p->type) {
case B_OVERFLOW:
fprintf(fp, "overflow: total len: %4lu page: %4lu\n",
(u_long)p->tlen, (u_long)p->pgno);
break;
case B_DUPLICATE:
fprintf(fp, "duplicate: page: %4lu\n", (u_long)p->pgno);
break;
}
}
/*
* __db_prflags --
* Print out flags values.
*
* PUBLIC: void __db_prflags __P((u_int32_t, const FN *));
*/
void
__db_prflags(flags, fn)
u_int32_t flags;
FN const *fn;
{
FILE *fp;
const FN *fnp;
int found;
const char *sep;
fp = __db_prinit(NULL);
sep = " (";
for (found = 0, fnp = fn; fnp->mask != 0; ++fnp)
if (fnp->mask & flags) {
fprintf(fp, "%s%s", sep, fnp->name);
sep = ", ";
found = 1;
}
if (found)
fprintf(fp, ")");
}
/*
* __db_psize --
* Get the page size.
*/
static void
__db_psize(mpf)
DB_MPOOLFILE *mpf;
{
BTMETA *mp;
db_pgno_t pgno;
set_psize = PSIZE_BOUNDARY - 1;
pgno = PGNO_METADATA;
if (memp_fget(mpf, &pgno, 0, &mp) != 0)
return;
switch (mp->magic) {
case DB_BTREEMAGIC:
case DB_HASHMAGIC:
set_psize = mp->pagesize;
break;
}
(void)memp_fput(mpf, mp, 0);
}

623
db2/db/db_rec.c Normal file
View File

@ -0,0 +1,623 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)db_rec.c 10.8 (Sleepycat) 8/22/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#endif
#include <ctype.h>
#include <errno.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include "db_int.h"
#include "shqueue.h"
#include "db_page.h"
#include "db_dispatch.h"
#include "log.h"
#include "hash.h"
#include "btree.h"
/*
* PUBLIC: int __db_addrem_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*
* This log message is generated whenever we add or remove a duplicate
* to/from a duplicate page. On recover, we just do the opposite.
*/
int
__db_addrem_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__db_addrem_args *argp;
DB *file_dbp, *mdbp;
DB_MPOOLFILE *mpf;
PAGE *pagep;
int change, cmp_n, cmp_p, ret;
REC_PRINT(__db_addrem_print);
REC_INTRO(__db_addrem_read);
if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (!redo) {
/*
* We are undoing and the page doesn't exist. That
* is equivalent to having a pagelsn of 0, so we
* would not have to undo anything. In this case,
* don't bother creating a page.
*/
*lsnp = argp->prev_lsn;
ret = 0;
goto out;
} else
if ((ret = memp_fget(mpf,
&argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
}
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
change = 0;
if ((cmp_p == 0 && redo && argp->opcode == DB_ADD_DUP) ||
(cmp_n == 0 && !redo && argp->opcode == DB_REM_DUP)) {
/* Need to redo an add, or undo a delete. */
if ((ret = __db_pitem(file_dbp, pagep, argp->indx, argp->nbytes,
argp->hdr.size == 0 ? NULL : &argp->hdr,
argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0)
goto out;
change = DB_MPOOL_DIRTY;
} else if ((cmp_n == 0 && !redo && argp->opcode == DB_ADD_DUP) ||
(cmp_p == 0 && redo && argp->opcode == DB_REM_DUP)) {
/* Need to undo an add, or redo a delete. */
if ((ret = __db_ditem(file_dbp, pagep, argp->indx,
argp->nbytes)) != 0)
goto out;
change = DB_MPOOL_DIRTY;
}
if (change)
if (redo)
LSN(pagep) = *lsnp;
else
LSN(pagep) = argp->pagelsn;
if ((ret = memp_fput(mpf, pagep, change)) == 0)
*lsnp = argp->prev_lsn;
out: REC_CLOSE;
}
/*
* PUBLIC: int __db_split_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__db_split_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__db_split_args *argp;
DB *file_dbp, *mdbp;
DB_MPOOLFILE *mpf;
PAGE *pagep;
int change, cmp_n, cmp_p, ret;
REC_PRINT(__db_split_print);
REC_INTRO(__db_split_read);
if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (!redo) {
/*
* We are undoing and the page doesn't exist. That
* is equivalent to having a pagelsn of 0, so we
* would not have to undo anything. In this case,
* don't bother creating a page.
*/
*lsnp = argp->prev_lsn;
ret = 0;
goto out;
} else
if ((ret = memp_fget(mpf,
&argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
}
/*
* There are two types of log messages here, one for the old page
* and one for the new pages created. The original image in the
* SPLITOLD record is used for undo. The image in the SPLITNEW
* is used for redo. We should never have a case where there is
* a redo operation and the SPLITOLD record is on disk, but not
* the SPLITNEW record. Therefore, we only redo NEW messages
* and only undo OLD messages.
*/
change = 0;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
if (cmp_p == 0 && redo) {
if (argp->opcode == DB_SPLITNEW) {
/* Need to redo the split described. */
memcpy(pagep,
argp->pageimage.data, argp->pageimage.size);
}
LSN(pagep) = *lsnp;
change = DB_MPOOL_DIRTY;
} else if (cmp_n == 0 && !redo) {
if (argp->opcode == DB_SPLITOLD) {
/* Put back the old image. */
memcpy(pagep,
argp->pageimage.data, argp->pageimage.size);
}
LSN(pagep) = argp->pagelsn;
change = DB_MPOOL_DIRTY;
}
if ((ret = memp_fput(mpf, pagep, change)) == 0)
*lsnp = argp->prev_lsn;
out: REC_CLOSE;
}
/*
* PUBLIC: int __db_big_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__db_big_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__db_big_args *argp;
DB *file_dbp, *mdbp;
DB_MPOOLFILE *mpf;
PAGE *pagep;
int change, cmp_n, cmp_p, ret;
REC_PRINT(__db_big_print);
REC_INTRO(__db_big_read);
if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (!redo) {
/*
* We are undoing and the page doesn't exist. That
* is equivalent to having a pagelsn of 0, so we
* would not have to undo anything. In this case,
* don't bother creating a page.
*/
ret = 0;
goto ppage;
} else
if ((ret = memp_fget(mpf,
&argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
}
/*
* There are three pages we need to check. The one on which we are
* adding data, the previous one whose next_pointer may have
* been updated, and the next one whose prev_pointer may have
* been updated.
*/
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
change = 0;
if ((cmp_p == 0 && redo && argp->opcode == DB_ADD_BIG) ||
(cmp_n == 0 && !redo && argp->opcode == DB_REM_BIG)) {
/* We are either redo-ing an add, or undoing a delete. */
P_INIT(pagep, file_dbp->pgsize, argp->pgno, argp->prev_pgno,
argp->next_pgno, 0, P_OVERFLOW);
OV_LEN(pagep) = argp->dbt.size;
OV_REF(pagep) = 1;
memcpy((u_int8_t *)pagep + P_OVERHEAD, argp->dbt.data,
argp->dbt.size);
PREV_PGNO(pagep) = argp->prev_pgno;
change = DB_MPOOL_DIRTY;
} else if ((cmp_n == 0 && !redo && argp->opcode == DB_ADD_BIG) ||
(cmp_p == 0 && redo && argp->opcode == DB_REM_BIG)) {
/*
* We are either undo-ing an add or redo-ing a delete.
* The page is about to be reclaimed in either case, so
* there really isn't anything to do here.
*/
change = DB_MPOOL_DIRTY;
}
if (change)
LSN(pagep) = redo ? *lsnp : argp->pagelsn;
if ((ret = memp_fput(mpf, pagep, change)) != 0)
goto out;
/* Now check the previous page. */
ppage: if (argp->prev_pgno != PGNO_INVALID) {
change = 0;
if ((ret = memp_fget(mpf, &argp->prev_pgno, 0, &pagep)) != 0)
if (!redo) {
/*
* We are undoing and the page doesn't exist.
* That is equivalent to having a pagelsn of 0,
* so we would not have to undo anything. In
* this case, don't bother creating a page.
*/
*lsnp = argp->prev_lsn;
ret = 0;
goto npage;
} else
if ((ret = memp_fget(mpf, &argp->prev_pgno,
DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->prevlsn);
if ((cmp_p == 0 && redo && argp->opcode == DB_ADD_BIG) ||
(cmp_n == 0 && !redo && argp->opcode == DB_REM_BIG)) {
/* Redo add, undo delete. */
NEXT_PGNO(pagep) = argp->pgno;
change = DB_MPOOL_DIRTY;
} else if ((cmp_n == 0 &&
!redo && argp->opcode == DB_ADD_BIG) ||
(cmp_p == 0 && redo && argp->opcode == DB_REM_BIG)) {
/* Redo delete, undo add. */
NEXT_PGNO(pagep) = argp->next_pgno;
change = DB_MPOOL_DIRTY;
}
if (change)
LSN(pagep) = redo ? *lsnp : argp->prevlsn;
if ((ret = memp_fput(mpf, pagep, change)) != 0)
goto out;
}
/* Now check the next page. Can only be set on a delete. */
npage: if (argp->next_pgno != PGNO_INVALID) {
change = 0;
if ((ret = memp_fget(mpf, &argp->next_pgno, 0, &pagep)) != 0)
if (!redo) {
/*
* We are undoing and the page doesn't exist.
* That is equivalent to having a pagelsn of 0,
* so we would not have to undo anything. In
* this case, don't bother creating a page.
*/
*lsnp = argp->prev_lsn;
ret = 0;
goto out;
} else
if ((ret = memp_fget(mpf, &argp->next_pgno,
DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->nextlsn);
if (cmp_p == 0 && redo) {
PREV_PGNO(pagep) = PGNO_INVALID;
change = DB_MPOOL_DIRTY;
} else if (cmp_n == 0 && !redo) {
PREV_PGNO(pagep) = argp->pgno;
change = DB_MPOOL_DIRTY;
}
if (change)
LSN(pagep) = redo ? *lsnp : argp->nextlsn;
if ((ret = memp_fput(mpf, pagep, change)) != 0)
goto out;
}
*lsnp = argp->prev_lsn;
out: REC_CLOSE;
}
/*
* __db_ovref_recover --
* Recovery function for __db_ioff().
*
* PUBLIC: int __db_ovref_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__db_ovref_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__db_ovref_args *argp;
DB *file_dbp, *mdbp;
DB_MPOOLFILE *mpf;
PAGE *pagep;
int modified, ret;
REC_PRINT(__db_ovref_print);
REC_INTRO(__db_ovref_read);
if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
(void)__db_pgerr(file_dbp, argp->pgno);
goto out;
}
modified = 0;
if (log_compare(lsnp, &argp->lsn) == 0 && redo) {
/* Need to redo update described. */
++OV_REF(pagep);
pagep->lsn = *lsnp;
modified = 1;
} else if (log_compare(lsnp, &LSN(pagep)) == 0 && !redo) {
/* Need to undo update described. */
--OV_REF(pagep);
pagep->lsn = argp->lsn;
modified = 1;
}
ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0);
*lsnp = argp->prev_lsn;
out: REC_CLOSE;
}
/*
* __db_relink_recover --
* Recovery function for relink.
*
* PUBLIC: int __db_relink_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__db_relink_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__db_relink_args *argp;
DB *file_dbp, *mdbp;
DB_MPOOLFILE *mpf;
PAGE *pagep;
int modified, ret;
REC_PRINT(__db_relink_print);
REC_INTRO(__db_relink_read);
/*
* There are three pages we need to check -- the page, and the
* previous and next pages, if they existed.
*/
if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (redo) {
(void)__db_pgerr(file_dbp, argp->pgno);
goto out;
}
goto next;
}
modified = 0;
if (log_compare(lsnp, &argp->lsn) == 0 && redo) {
/* Redo the relink. */
pagep->lsn = *lsnp;
modified = 1;
} else if (log_compare(lsnp, &LSN(pagep)) == 0 && !redo) {
/* Undo the relink. */
pagep->next_pgno = argp->next;
pagep->prev_pgno = argp->prev;
pagep->lsn = argp->lsn;
modified = 1;
}
if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
(void)__db_panic(file_dbp);
goto out;
}
next: if ((ret = memp_fget(mpf, &argp->next, 0, &pagep)) != 0) {
if (redo) {
(void)__db_pgerr(file_dbp, argp->next);
goto out;
}
goto prev;
}
modified = 0;
if (log_compare(lsnp, &argp->lsn_next) == 0 && redo) {
/* Redo the relink. */
pagep->prev_pgno = argp->prev;
pagep->lsn = *lsnp;
modified = 1;
} else if (log_compare(lsnp, &LSN(pagep)) == 0 && !redo) {
/* Undo the relink. */
pagep->prev_pgno = argp->pgno;
pagep->lsn = argp->lsn_next;
modified = 1;
}
if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
(void)__db_panic(file_dbp);
goto out;
}
prev: if ((ret = memp_fget(mpf, &argp->prev, 0, &pagep)) != 0) {
if (redo) {
(void)__db_pgerr(file_dbp, argp->prev);
goto out;
}
goto done;
}
modified = 0;
if (log_compare(lsnp, &argp->lsn_prev) == 0 && redo) {
/* Redo the relink. */
pagep->next_pgno = argp->next;
pagep->lsn = *lsnp;
modified = 1;
} else if (log_compare(lsnp, &LSN(pagep)) == 0 && !redo) {
/* Undo the relink. */
pagep->next_pgno = argp->pgno;
pagep->lsn = argp->lsn_prev;
modified = 1;
}
if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
(void) __db_panic(file_dbp);
goto out;
}
done: *lsnp = argp->prev_lsn;
ret = 0;
out: REC_CLOSE;
}
/*
* PUBLIC: int __db_addpage_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__db_addpage_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__db_addpage_args *argp;
DB *file_dbp, *mdbp;
DB_MPOOLFILE *mpf;
PAGE *pagep;
int change, cmp_n, cmp_p, ret;
REC_PRINT(__db_addpage_print);
REC_INTRO(__db_addpage_read);
/*
* We need to check two pages: the old one and the new one onto
* which we're going to add duplicates. Do the old one first.
*/
if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0)
goto out;
change = 0;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->lsn);
if (cmp_p == 0 && redo) {
NEXT_PGNO(pagep) = argp->nextpgno;
LSN(pagep) = *lsnp;
change = DB_MPOOL_DIRTY;
} else if (cmp_n == 0 && !redo) {
NEXT_PGNO(pagep) = PGNO_INVALID;
LSN(pagep) = argp->lsn;
change = DB_MPOOL_DIRTY;
}
if ((ret = memp_fput(mpf, pagep, change)) != 0)
goto out;
if ((ret = memp_fget(mpf, &argp->nextpgno, 0, &pagep)) != 0)
if (!redo) {
/*
* We are undoing and the page doesn't exist. That
* is equivalent to having a pagelsn of 0, so we
* would not have to undo anything. In this case,
* don't bother creating a page.
*/
ret = 0;
goto out;
} else
if ((ret = memp_fget(mpf,
&argp->nextpgno, DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
change = 0;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->nextlsn);
if (cmp_p == 0 && redo) {
PREV_PGNO(pagep) = argp->pgno;
LSN(pagep) = *lsnp;
change = DB_MPOOL_DIRTY;
} else if (cmp_n == 0 && !redo) {
PREV_PGNO(pagep) = PGNO_INVALID;
LSN(pagep) = argp->nextlsn;
change = DB_MPOOL_DIRTY;
}
ret = memp_fput(mpf, pagep, change);
out: if (ret == 0)
*lsnp = argp->prev_lsn;
REC_CLOSE;
}
/*
* __db_debug_recover --
* Recovery function for debug.
*
* PUBLIC: int __db_debug_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__db_debug_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__db_debug_args *argp;
int ret;
REC_PRINT(__db_debug_print);
REC_NOOP_INTRO(__db_debug_read);
*lsnp = argp->prev_lsn;
ret = 0;
REC_NOOP_CLOSE;
}
/*
* __db_noop_recover --
* Recovery function for noop.
*
* PUBLIC: int __db_noop_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__db_noop_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__db_noop_args *argp;
int ret;
REC_PRINT(__db_noop_print);
REC_NOOP_INTRO(__db_noop_read);
*lsnp = argp->prev_lsn;
ret = 0;
REC_NOOP_CLOSE;
}

149
db2/db/db_ret.c Normal file
View File

@ -0,0 +1,149 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)db_ret.c 10.5 (Sleepycat) 7/12/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "btree.h"
#include "hash.h"
#include "db_am.h"
/*
* __db_ret --
* Build return DBT.
*
* PUBLIC: int __db_ret __P((DB *,
* PUBLIC: PAGE *, u_int32_t, DBT *, void **, u_int32_t *));
*/
int
__db_ret(dbp, h, indx, dbt, memp, memsize)
DB *dbp;
PAGE *h;
u_int32_t indx;
DBT *dbt;
void **memp;
u_int32_t *memsize;
{
BKEYDATA *bk;
HOFFPAGE ho;
BOVERFLOW *bo;
u_int32_t len;
void *data, *hk;
switch (TYPE(h)) {
case P_HASH:
hk = P_ENTRY(h, indx);
if (((HKEYDATA *)hk)->type == H_OFFPAGE) {
memcpy(&ho, hk, sizeof(HOFFPAGE));
return (__db_goff(dbp, dbt,
ho.tlen, ho.pgno, memp, memsize));
}
len = LEN_HKEYDATA(h, dbp->pgsize, indx);
data = ((HKEYDATA *)hk)->data;
break;
case P_DUPLICATE:
case P_LBTREE:
case P_LRECNO:
bk = GET_BKEYDATA(h, indx);
if (bk->type == B_OVERFLOW) {
bo = (BOVERFLOW *)bk;
return (__db_goff(dbp, dbt,
bo->tlen, bo->pgno, memp, memsize));
}
len = bk->len;
data = bk->data;
break;
default:
return (__db_pgfmt(dbp, h->pgno));
}
return (__db_retcopy(dbt, data, len, memp, memsize,
F_ISSET(dbt, DB_DBT_INTERNAL) ? NULL : dbp->db_malloc));
}
/*
* __db_retcopy --
* Copy the returned data into the user's DBT, handling special flags.
*
* PUBLIC: int __db_retcopy __P((DBT *,
* PUBLIC: void *, u_int32_t, void **, u_int32_t *, void *(*)(size_t)));
*/
int
__db_retcopy(dbt, data, len, memp, memsize, db_malloc)
DBT *dbt;
void *data;
u_int32_t len;
void **memp;
u_int32_t *memsize;
void *(*db_malloc) __P((size_t));
{
/* If returning a partial record, reset the length. */
if (F_ISSET(dbt, DB_DBT_PARTIAL)) {
data = (u_int8_t *)data + dbt->doff;
if (len > dbt->doff) {
len -= dbt->doff;
if (len > dbt->dlen)
len = dbt->dlen;
} else
len = 0;
}
/*
* Return the length of the returned record in the DBT size field.
* This satisfies the requirement that if we're using user memory
* and insufficient memory was provided, return the amount necessary
* in the size field.
*/
dbt->size = len;
/*
* Allocate any necessary memory.
*
* XXX: Never allocate 0 bytes.
*/
if (F_ISSET(dbt, DB_DBT_MALLOC)) {
dbt->data = db_malloc == NULL ?
(void *)malloc(len + 1) :
(void *)db_malloc(len + 1);
if (dbt->data == NULL)
return (ENOMEM);
} else if (F_ISSET(dbt, DB_DBT_USERMEM)) {
if (dbt->ulen < len)
return (ENOMEM);
} else if (memp == NULL || memsize == NULL) {
return (EINVAL);
} else {
if (*memsize == 0 || *memsize < len) {
*memp = *memp == NULL ?
(void *)malloc(len + 1) :
(void *)realloc(*memp, len + 1);
if (*memp == NULL) {
*memsize = 0;
return (ENOMEM);
}
*memsize = len + 1;
}
dbt->data = *memp;
}
memcpy(dbt->data, data, len);
return (0);
}

125
db2/db/db_thread.c Normal file
View File

@ -0,0 +1,125 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)db_thread.c 8.11 (Sleepycat) 8/18/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "shqueue.h"
#include "db_am.h"
static int __db_getlockid __P((DB *, DB *));
/*
* __db_gethandle --
* Called by db access method routines when the DB_THREAD flag is set.
* This routine returns a handle, either an existing handle from the
* chain of handles, or creating one if necessary.
*
* PUBLIC: int __db_gethandle __P((DB *, int (*)(DB *, DB *), DB **));
*/
int
__db_gethandle(dbp, am_func, dbpp)
DB *dbp, **dbpp;
int (*am_func) __P((DB *, DB *));
{
DB *ret_dbp;
int ret, t_ret;
if ((ret = __db_mutex_lock((db_mutex_t *)dbp->mutex, -1,
dbp->dbenv == NULL ? NULL : dbp->dbenv->db_yield)) != 0)
return (ret);
if ((ret_dbp = LIST_FIRST(&dbp->handleq)) != NULL)
/* Simply take one off the list. */
LIST_REMOVE(ret_dbp, links);
else {
/* Allocate a new handle. */
if ((ret_dbp = (DB *)malloc(sizeof(*dbp))) == NULL) {
ret = ENOMEM;
goto err;
}
memcpy(ret_dbp, dbp, sizeof(*dbp));
ret_dbp->internal = NULL;
TAILQ_INIT(&ret_dbp->curs_queue);
/* Set the locker, the lock structure and the lock DBT. */
if ((ret = __db_getlockid(dbp, ret_dbp)) != 0)
goto err;
/* Finally, call the access method specific dup function. */
if ((ret = am_func(dbp, ret_dbp)) != 0)
goto err;
}
*dbpp = ret_dbp;
if (0) {
err: if (ret_dbp != NULL)
FREE(ret_dbp, sizeof(*ret_dbp));
}
if ((t_ret =
__db_mutex_unlock((db_mutex_t *)dbp->mutex, -1)) != 0 && ret == 0)
ret = t_ret;
return (ret);
}
/*
* __db_puthandle --
* Return a DB handle to the pool for later use.
*
* PUBLIC: int __db_puthandle __P((DB *));
*/
int
__db_puthandle(dbp)
DB *dbp;
{
DB *master;
int ret;
master = dbp->master;
if ((ret = __db_mutex_lock((db_mutex_t *)master->mutex, -1,
dbp->dbenv == NULL ? NULL : dbp->dbenv->db_yield)) != 0)
return (ret);
LIST_INSERT_HEAD(&master->handleq, dbp, links);
return (__db_mutex_unlock((db_mutex_t *)master->mutex, -1));
}
/*
* __db_getlockid --
* Create a new locker ID and copy the file lock information from
* the old DB into the new one.
*/
static int
__db_getlockid(dbp, new_dbp)
DB *dbp, *new_dbp;
{
int ret;
if (F_ISSET(dbp, DB_AM_LOCKING)) {
if ((ret = lock_id(dbp->dbenv->lk_info, &new_dbp->locker)) != 0)
return (ret);
memcpy(new_dbp->lock.fileid, dbp->lock.fileid, DB_FILE_ID_LEN);
new_dbp->lock_dbt.size = sizeof(new_dbp->lock);
new_dbp->lock_dbt.data = &new_dbp->lock;
}
return (0);
}

472
db2/db185/db185.c Normal file
View File

@ -0,0 +1,472 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char copyright[] =
"@(#) Copyright (c) 1997\n\
Sleepycat Software Inc. All rights reserved.\n";
static const char sccsid[] = "@(#)db185.c 8.13 (Sleepycat) 8/24/97";
#endif
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#endif
#include "db_int.h"
#include "db185_int.h"
#include "common_ext.h"
static int db185_close __P((DB185 *));
static int db185_del __P((const DB185 *, const DBT185 *, u_int));
static int db185_fd __P((const DB185 *));
static int db185_get __P((const DB185 *, const DBT185 *, DBT185 *, u_int));
static int db185_put __P((const DB185 *, DBT185 *, const DBT185 *, u_int));
static int db185_seq __P((const DB185 *, DBT185 *, DBT185 *, u_int));
static int db185_sync __P((const DB185 *, u_int));
DB185 *
__dbopen(file, oflags, mode, type, openinfo)
const char *file;
int oflags, mode;
DBTYPE type;
const void *openinfo;
{
const BTREEINFO *bi;
const HASHINFO *hi;
const RECNOINFO *ri;
DB *dbp;
DB185 *db185p;
DB_INFO dbinfo, *dbinfop;
int s_errno;
if ((db185p = (DB185 *)calloc(1, sizeof(DB185))) == NULL)
return (NULL);
dbinfop = NULL;
memset(&dbinfo, 0, sizeof(dbinfo));
/*
* !!!
* The DBTYPE enum wasn't initialized in DB 185, so it's off-by-one
* from DB 2.0.
*/
switch (type) {
case 0: /* DB_BTREE */
type = DB_BTREE;
if ((bi = openinfo) != NULL) {
dbinfop = &dbinfo;
if (bi->flags & ~R_DUP)
goto einval;
if (bi->flags & R_DUP)
dbinfop->flags |= DB_DUP;
dbinfop->db_cachesize = bi->cachesize;
dbinfop->bt_maxkey = bi->maxkeypage;
dbinfop->bt_minkey = bi->minkeypage;
dbinfop->db_pagesize = bi->psize;
/*
* !!!
* Comparisons and prefix calls work because the DBT
* structures in 1.85 and 2.0 have the same initial
* fields.
*/
dbinfop->bt_compare = bi->compare;
dbinfop->bt_prefix = bi->prefix;
dbinfop->db_lorder = bi->lorder;
}
break;
case 1: /* DB_HASH */
type = DB_HASH;
if ((hi = openinfo) != NULL) {
dbinfop = &dbinfo;
dbinfop->db_pagesize = hi->bsize;
dbinfop->h_ffactor = hi->ffactor;
dbinfop->h_nelem = hi->nelem;
dbinfop->db_cachesize = hi->cachesize;
dbinfop->h_hash = hi->hash;
dbinfop->db_lorder = hi->lorder;
}
break;
case 2: /* DB_RECNO */
type = DB_RECNO;
dbinfop = &dbinfo;
/* DB 1.85 did renumbering by default. */
dbinfop->flags |= DB_RENUMBER;
/*
* !!!
* The file name given to DB 1.85 recno is the name of the DB
* 2.0 backing file. If the file doesn't exist, create it if
* the user has the O_CREAT flag set, DB 1.85 did it for you,
* and DB 2.0 doesn't.
*
* !!!
* Note, the file name in DB 1.85 was a const -- we don't do
* that in DB 2.0, so do that cast.
*/
if (file != NULL) {
if (oflags & O_CREAT && __db_exists(file, NULL) != 0)
(void)close(open(file, oflags, mode));
dbinfop->re_source = (char *)file;
file = NULL;
}
if ((ri = openinfo) != NULL) {
/*
* !!!
* We can't support the bfname field.
*/
#define BFMSG "DB: DB 1.85's recno bfname field is not supported.\n"
if (ri->bfname != NULL) {
(void)write(2, BFMSG, sizeof(BFMSG) - 1);
goto einval;
}
if (ri->flags & ~(R_FIXEDLEN | R_NOKEY | R_SNAPSHOT))
goto einval;
if (ri->flags & R_FIXEDLEN) {
dbinfop->flags |= DB_FIXEDLEN;
if (ri->bval != 0) {
dbinfop->flags |= DB_PAD;
dbinfop->re_pad = ri->bval;
}
} else
if (ri->bval != 0) {
dbinfop->flags |= DB_DELIMITER;
dbinfop->re_delim = ri->bval;
}
/*
* !!!
* We ignore the R_NOKEY flag, but that's okay, it was
* only an optimization that was never implemented.
*/
if (ri->flags & R_SNAPSHOT)
dbinfop->flags |= DB_SNAPSHOT;
dbinfop->db_cachesize = ri->cachesize;
dbinfop->db_pagesize = ri->psize;
dbinfop->db_lorder = ri->lorder;
dbinfop->re_len = ri->reclen;
}
break;
default:
goto einval;
}
db185p->close = db185_close;
db185p->del = db185_del;
db185p->fd = db185_fd;
db185p->get = db185_get;
db185p->put = db185_put;
db185p->seq = db185_seq;
db185p->sync = db185_sync;
/*
* !!!
* Store the returned pointer to the real DB 2.0 structure in the
* internal pointer. Ugly, but we're not going for pretty, here.
*/
if ((errno = db_open(file,
type, __db_oflags(oflags), mode, NULL, dbinfop, &dbp)) != 0) {
free(db185p);
return (NULL);
}
/* Create the cursor used for sequential ops. */
if ((errno = dbp->cursor(dbp, NULL, &((DB185 *)db185p)->dbc)) != 0) {
s_errno = errno;
(void)dbp->close(dbp, 0);
free(db185p);
errno = s_errno;
return (NULL);
}
db185p->internal = dbp;
return (db185p);
einval: free(db185p);
errno = EINVAL;
return (NULL);
}
weak_alias (__dbopen, dbopen)
static int
db185_close(db185p)
DB185 *db185p;
{
DB *dbp;
dbp = (DB *)db185p->internal;
errno = dbp->close(dbp, 0);
free(db185p);
return (errno == 0 ? 0 : -1);
}
static int
db185_del(db185p, key185, flags)
const DB185 *db185p;
const DBT185 *key185;
u_int flags;
{
DB *dbp;
DBT key;
dbp = (DB *)db185p->internal;
memset(&key, 0, sizeof(key));
key.data = key185->data;
key.size = key185->size;
if (flags & ~R_CURSOR)
goto einval;
if (flags & R_CURSOR)
errno = db185p->dbc->c_del(db185p->dbc, 0);
else
errno = dbp->del(dbp, NULL, &key, 0);
switch (errno) {
case 0:
return (0);
case DB_NOTFOUND:
return (1);
}
return (-1);
einval: errno = EINVAL;
return (-1);
}
static int
db185_fd(db185p)
const DB185 *db185p;
{
DB *dbp;
int fd;
dbp = (DB *)db185p->internal;
return ((errno = dbp->fd(dbp, &fd)) == 0 ? fd : -1);
}
static int
db185_get(db185p, key185, data185, flags)
const DB185 *db185p;
const DBT185 *key185;
DBT185 *data185;
u_int flags;
{
DB *dbp;
DBT key, data;
dbp = (DB *)db185p->internal;
memset(&key, 0, sizeof(key));
key.data = key185->data;
key.size = key185->size;
memset(&data, 0, sizeof(data));
data.data = data185->data;
data.size = data185->size;
if (flags)
goto einval;
switch (errno = dbp->get(dbp, NULL, &key, &data, 0)) {
case 0:
data185->data = data.data;
data185->size = data.size;
return (0);
case DB_NOTFOUND:
return (1);
}
return (-1);
einval: errno = EINVAL;
return (-1);
}
static int
db185_put(db185p, key185, data185, flags)
const DB185 *db185p;
DBT185 *key185;
const DBT185 *data185;
u_int flags;
{
DB *dbp;
DBC *dbcp_put;
DBT key, data;
int s_errno;
dbp = (DB *)db185p->internal;
memset(&key, 0, sizeof(key));
key.data = key185->data;
key.size = key185->size;
memset(&data, 0, sizeof(data));
data.data = data185->data;
data.size = data185->size;
switch (flags) {
case 0:
errno = dbp->put(dbp, NULL, &key, &data, 0);
break;
case R_CURSOR:
errno =
db185p->dbc->c_put(db185p->dbc, &key, &data, DB_CURRENT);
break;
case R_IAFTER:
case R_IBEFORE:
if (dbp->type != DB_RECNO)
goto einval;
if ((errno = dbp->cursor(dbp, NULL, &dbcp_put)) != 0)
return (-1);
if ((errno =
dbcp_put->c_get(dbcp_put, &key, &data, DB_SET)) != 0) {
s_errno = errno;
(void)dbcp_put->c_close(dbcp_put);
errno = s_errno;
return (-1);
}
memset(&data, 0, sizeof(data));
data.data = data185->data;
data.size = data185->size;
errno = dbcp_put->c_put(dbcp_put,
&key, &data, flags == R_IAFTER ? DB_AFTER : DB_BEFORE);
s_errno = errno;
(void)dbcp_put->c_close(dbcp_put);
errno = s_errno;
break;
case R_NOOVERWRITE:
errno = dbp->put(dbp, NULL, &key, &data, DB_NOOVERWRITE);
break;
case R_SETCURSOR:
if (dbp->type != DB_BTREE && dbp->type != DB_RECNO)
goto einval;
if ((errno = dbp->put(dbp, NULL, &key, &data, 0)) != 0)
break;
errno =
db185p->dbc->c_get(db185p->dbc, &key, &data, DB_SET_RANGE);
break;
default:
goto einval;
}
switch (errno) {
case 0:
key185->data = key.data;
key185->size = key.size;
return (0);
case DB_KEYEXIST:
return (1);
}
return (-1);
einval: errno = EINVAL;
return (-1);
}
static int
db185_seq(db185p, key185, data185, flags)
const DB185 *db185p;
DBT185 *key185, *data185;
u_int flags;
{
DB *dbp;
DBT key, data;
dbp = (DB *)db185p->internal;
memset(&key, 0, sizeof(key));
key.data = key185->data;
key.size = key185->size;
memset(&data, 0, sizeof(data));
data.data = data185->data;
data.size = data185->size;
switch (flags) {
case R_CURSOR:
flags = DB_SET_RANGE;
break;
case R_FIRST:
flags = DB_FIRST;
break;
case R_LAST:
if (dbp->type != DB_BTREE && dbp->type != DB_RECNO)
goto einval;
flags = DB_LAST;
break;
case R_NEXT:
flags = DB_NEXT;
break;
case R_PREV:
if (dbp->type != DB_BTREE && dbp->type != DB_RECNO)
goto einval;
flags = DB_PREV;
break;
default:
goto einval;
}
switch (errno = db185p->dbc->c_get(db185p->dbc, &key, &data, flags)) {
case 0:
key185->data = key.data;
key185->size = key.size;
data185->data = data.data;
data185->size = data.size;
return (0);
case DB_NOTFOUND:
return (1);
}
return (-1);
einval: errno = EINVAL;
return (-1);
}
static int
db185_sync(db185p, flags)
const DB185 *db185p;
u_int flags;
{
DB *dbp;
dbp = (DB *)db185p->internal;
switch (flags) {
case 0:
break;
case R_RECNOSYNC:
/*
* !!!
* We can't support the R_RECNOSYNC flag.
*/
#define RSMSG "DB: DB 1.85's R_RECNOSYNC sync flag is not supported.\n"
(void)write(2, RSMSG, sizeof(RSMSG) - 1);
goto einval;
default:
goto einval;
}
return ((errno = dbp->sync(dbp, 0)) == 0 ? 0 : -1);
einval: errno = EINVAL;
return (-1);
}

137
db2/db185/db185_int.h Normal file
View File

@ -0,0 +1,137 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
* Keith Bostic. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)db185_int.h 8.4 (Sleepycat) 7/27/97
*/
#ifndef _DB185_H_
#define _DB185_H_
/* Routine flags. */
#define R_CURSOR 1 /* del, put, seq */
#define __R_UNUSED 2 /* UNUSED */
#define R_FIRST 3 /* seq */
#define R_IAFTER 4 /* put (RECNO) */
#define R_IBEFORE 5 /* put (RECNO) */
#define R_LAST 6 /* seq (BTREE, RECNO) */
#define R_NEXT 7 /* seq */
#define R_NOOVERWRITE 8 /* put */
#define R_PREV 9 /* seq (BTREE, RECNO) */
#define R_SETCURSOR 10 /* put (RECNO) */
#define R_RECNOSYNC 11 /* sync (RECNO) */
typedef struct {
void *data; /* data */
size_t size; /* data length */
} DBT185;
/* Access method description structure. */
typedef struct __db185 {
DBTYPE type; /* Underlying db type. */
int (*close) __P((struct __db185 *));
int (*del) __P((const struct __db185 *, const DBT185 *, u_int));
int (*get)
__P((const struct __db185 *, const DBT185 *, DBT185 *, u_int));
int (*put)
__P((const struct __db185 *, DBT185 *, const DBT185 *, u_int));
int (*seq)
__P((const struct __db185 *, DBT185 *, DBT185 *, u_int));
int (*sync) __P((const struct __db185 *, u_int));
void *internal; /* Access method private. */
int (*fd) __P((const struct __db185 *));
/*
* !!!
* Added to the end of the DB 1.85 DB structure, it's needed to
* hold the DB 2.0 cursor used for DB 1.85 sequential operations.
*/
DBC *dbc; /* DB 1.85 sequential cursor. */
} DB185;
/* Structure used to pass parameters to the btree routines. */
typedef struct {
#define R_DUP 0x01 /* duplicate keys */
u_long flags;
u_int cachesize; /* bytes to cache */
int maxkeypage; /* maximum keys per page */
int minkeypage; /* minimum keys per page */
u_int psize; /* page size */
int (*compare) /* comparison function */
__P((const DBT *, const DBT *));
size_t (*prefix) /* prefix function */
__P((const DBT *, const DBT *));
int lorder; /* byte order */
} BTREEINFO;
/* Structure used to pass parameters to the hashing routines. */
typedef struct {
u_int bsize; /* bucket size */
u_int ffactor; /* fill factor */
u_int nelem; /* number of elements */
u_int cachesize; /* bytes to cache */
u_int32_t /* hash function */
(*hash) __P((const void *, size_t));
int lorder; /* byte order */
} HASHINFO;
/* Structure used to pass parameters to the record routines. */
typedef struct {
#define R_FIXEDLEN 0x01 /* fixed-length records */
#define R_NOKEY 0x02 /* key not required */
#define R_SNAPSHOT 0x04 /* snapshot the input */
u_long flags;
u_int cachesize; /* bytes to cache */
u_int psize; /* page size */
int lorder; /* byte order */
size_t reclen; /* record length (fixed-length records) */
u_char bval; /* delimiting byte (variable-length records */
char *bfname; /* btree file name */
} RECNOINFO;
#if defined(__cplusplus)
extern "C" {
#endif
DB185 *dbopen __P((const char *, int, int, DBTYPE, const void *));
#if defined(__cplusplus)
};
#endif
#endif /* !_DB185_H_ */

171
db2/db_185.h Normal file
View File

@ -0,0 +1,171 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)db_185.h.src 8.3 (Sleepycat) 7/27/97
*/
#ifndef _DB_185_H_
#define _DB_185_H_
#include <sys/types.h>
#include <limits.h>
/*
* XXX
* Handle function prototypes and the keyword "const". This steps on name
* space that DB doesn't control, but all of the other solutions are worse.
*/
#undef __P
#if defined(__STDC__) || defined(__cplusplus)
#define __P(protos) protos /* ANSI C prototypes */
#else
#define const
#define __P(protos) () /* K&R C preprocessor */
#endif
#define RET_ERROR -1 /* Return values. */
#define RET_SUCCESS 0
#define RET_SPECIAL 1
#ifndef __BIT_TYPES_DEFINED__
#define __BIT_TYPES_DEFINED__
#endif
#define MAX_PAGE_NUMBER 0xffffffff /* >= # of pages in a file */
typedef u_int32_t pgno_t;
#define MAX_PAGE_OFFSET 65535 /* >= # of bytes in a page */
typedef u_int16_t indx_t;
#define MAX_REC_NUMBER 0xffffffff /* >= # of records in a tree */
typedef u_int32_t recno_t;
/* Key/data structure -- a Data-Base Thang. */
typedef struct {
void *data; /* data */
size_t size; /* data length */
} DBT;
/* Routine flags. */
#define R_CURSOR 1 /* del, put, seq */
#define __R_UNUSED 2 /* UNUSED */
#define R_FIRST 3 /* seq */
#define R_IAFTER 4 /* put (RECNO) */
#define R_IBEFORE 5 /* put (RECNO) */
#define R_LAST 6 /* seq (BTREE, RECNO) */
#define R_NEXT 7 /* seq */
#define R_NOOVERWRITE 8 /* put */
#define R_PREV 9 /* seq (BTREE, RECNO) */
#define R_SETCURSOR 10 /* put (RECNO) */
#define R_RECNOSYNC 11 /* sync (RECNO) */
typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
/* Access method description structure. */
typedef struct __db {
DBTYPE type; /* Underlying db type. */
int (*close) __P((struct __db *));
int (*del) __P((const struct __db *, const DBT *, u_int));
int (*get) __P((const struct __db *, const DBT *, DBT *, u_int));
int (*put) __P((const struct __db *, DBT *, const DBT *, u_int));
int (*seq) __P((const struct __db *, DBT *, DBT *, u_int));
int (*sync) __P((const struct __db *, u_int));
void *internal; /* Access method private. */
int (*fd) __P((const struct __db *));
} DB;
#define BTREEMAGIC 0x053162
#define BTREEVERSION 3
/* Structure used to pass parameters to the btree routines. */
typedef struct {
#define R_DUP 0x01 /* duplicate keys */
u_long flags;
u_int cachesize; /* bytes to cache */
int maxkeypage; /* maximum keys per page */
int minkeypage; /* minimum keys per page */
u_int psize; /* page size */
int (*compare) /* comparison function */
__P((const DBT *, const DBT *));
size_t (*prefix) /* prefix function */
__P((const DBT *, const DBT *));
int lorder; /* byte order */
} BTREEINFO;
#define HASHMAGIC 0x061561
#define HASHVERSION 2
/* Structure used to pass parameters to the hashing routines. */
typedef struct {
u_int bsize; /* bucket size */
u_int ffactor; /* fill factor */
u_int nelem; /* number of elements */
u_int cachesize; /* bytes to cache */
u_int32_t /* hash function */
(*hash) __P((const void *, size_t));
int lorder; /* byte order */
} HASHINFO;
/* Structure used to pass parameters to the record routines. */
typedef struct {
#define R_FIXEDLEN 0x01 /* fixed-length records */
#define R_NOKEY 0x02 /* key not required */
#define R_SNAPSHOT 0x04 /* snapshot the input */
u_long flags;
u_int cachesize; /* bytes to cache */
u_int psize; /* page size */
int lorder; /* byte order */
size_t reclen; /* record length (fixed-length records) */
u_char bval; /* delimiting byte (variable-length records */
char *bfname; /* btree file name */
} RECNOINFO;
#if defined(__cplusplus)
extern "C" {
#endif
DB *__dbopen __P((const char *, int, int, DBTYPE, const void *));
DB *dbopen __P((const char *, int, int, DBTYPE, const void *));
#if defined(__cplusplus)
};
#endif
#endif /* !_DB_185_H_ */

332
db2/db_int.h Normal file
View File

@ -0,0 +1,332 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
* @(#)db_int.h.src 10.28 (Sleepycat) 8/20/97
*/
#ifndef _DB_INTERNAL_H_
#define _DB_INTERNAL_H_
#include "db.h" /* Standard DB include file. */
#include "queue.h"
#include "os_ext.h"
/*******************************************************
* General purpose constants and macros.
*******************************************************/
#define UINT32_T_MAX 0xffffffff /* Maximum 32 bit unsigned. */
#define UINT16_T_MAX 0xffff /* Maximum 16 bit unsigned. */
#define DB_MIN_PGSIZE 0x000200 /* Minimum page size. */
#define DB_MAX_PGSIZE 0x010000 /* Maximum page size. */
#define DB_MINCACHE 10 /* Minimum cached pages */
/*
* Aligning items to particular sizes or in pages or memory. ALIGNP is a
* separate macro, as we've had to cast the pointer to different integral
* types on different architectures.
*
* We cast pointers into unsigned longs when manipulating them because C89
* guarantees that u_long is the largest available integral type and further,
* to never generate overflows. However, neither C89 or C9X requires that
* any integer type be large enough to hold a pointer, although C9X created
* the intptr_t type, which is guaranteed to hold a pointer but may or may
* not exist. At some point in the future, we should test for intptr_t and
* use it where available.
*/
#undef ALIGNTYPE
#define ALIGNTYPE u_long
#undef ALIGNP
#define ALIGNP(value, bound) ALIGN((ALIGNTYPE)value, bound)
#undef ALIGN
#define ALIGN(value, bound) (((value) + (bound) - 1) & ~((bound) - 1))
/*
* There are several on-page structures that are declared to have a number of
* fields followed by a variable length array of items. The structure size
* without including the variable length array or the address of the first of
* those elements can be found using SSZ.
*
* This macro can also be used to find the offset of a structure element in a
* structure. This is used in various places to copy structure elements from
* unaligned memory references, e.g., pointers into a packed page.
*
* There are two versions because compilers object if you take the address of
* an array.
*/
#undef SSZ
#define SSZ(name, field) ((int)&(((name *)0)->field))
#undef SSZA
#define SSZA(name, field) ((int)&(((name *)0)->field[0]))
/* Free and free-string macros that overwrite memory during debugging. */
#ifdef DEBUG
#undef FREE
#define FREE(p, len) { \
memset(p, 0xff, len); \
free(p); \
}
#undef FREES
#define FREES(p) { \
FREE(p, strlen(p)); \
}
#else
#undef FREE
#define FREE(p, len) { \
free(p); \
}
#undef FREES
#define FREES(p) { \
free(p); \
}
#endif
/* Structure used to print flag values. */
typedef struct __fn {
u_int32_t mask; /* Flag value. */
const char *name; /* Flag name. */
} FN;
/* Set, clear and test flags. */
#define F_SET(p, f) (p)->flags |= (f)
#define F_CLR(p, f) (p)->flags &= ~(f)
#define F_ISSET(p, f) ((p)->flags & (f))
#define LF_SET(f) (flags |= (f))
#define LF_CLR(f) (flags &= ~(f))
#define LF_ISSET(f) (flags & (f))
/* Display separator string. */
#undef DB_LINE
#define DB_LINE "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
/*******************************************************
* Files.
*******************************************************/
#ifndef MAXPATHLEN /* Maximum path length. */
#ifdef PATH_MAX
#define MAXPATHLEN PATH_MAX
#else
#define MAXPATHLEN 1024
#endif
#endif
#define PATH_DOT "." /* Current working directory. */
#define PATH_SEPARATOR "/" /* Path separator character. */
#ifndef S_IRUSR /* UNIX specific file permissions. */
#define S_IRUSR 0000400 /* R for owner */
#define S_IWUSR 0000200 /* W for owner */
#define S_IRGRP 0000040 /* R for group */
#define S_IWGRP 0000020 /* W for group */
#define S_IROTH 0000004 /* R for other */
#define S_IWOTH 0000002 /* W for other */
#endif
#ifndef S_ISDIR /* UNIX specific: directory test. */
#define S_ISDIR(m) ((m & 0170000) == 0040000)
#endif
/*******************************************************
* Mutex support.
*******************************************************/
typedef unsigned char tsl_t;
/*
* !!!
* Various systems require different alignments for mutexes (the worst we've
* seen so far is 16-bytes on some HP architectures). The mutex (tsl_t) must
* be first in the db_mutex_t structure, which must itself be first in the
* region. This ensures the alignment is as returned by mmap(2), which should
* be sufficient. All other mutex users must ensure proper alignment locally.
*/
#define MUTEX_ALIGNMENT 1
/*
* The offset of a mutex in memory.
*/
#define MUTEX_LOCK_OFFSET(a, b) ((off_t)((u_int8_t *)b - (u_int8_t *)a))
typedef struct _db_mutex_t {
#ifdef HAVE_SPINLOCKS
tsl_t tsl_resource; /* Resource test and set. */
#ifdef DEBUG
u_long pid; /* Lock holder: 0 or process pid. */
#endif
#else
off_t off; /* Backing file offset. */
u_long pid; /* Lock holder: 0 or process pid. */
#endif
#ifdef MUTEX_STATISTICS
u_long mutex_set_wait; /* Blocking mutex: required waiting. */
u_long mutex_set_nowait; /* Blocking mutex: without waiting. */
#endif
} db_mutex_t;
#include "mutex_ext.h"
/*******************************************************
* Access methods.
*******************************************************/
/* Lock/unlock a DB thread. */
#define DB_THREAD_LOCK(dbp) \
(F_ISSET(dbp, DB_AM_THREAD) ? \
__db_mutex_lock((db_mutex_t *)(dbp)->mutex, -1, \
(dbp)->dbenv == NULL ? NULL : (dbp)->dbenv->db_yield) : 0)
#define DB_THREAD_UNLOCK(dbp) \
(F_ISSET(dbp, DB_AM_THREAD) ? \
__db_mutex_unlock((db_mutex_t *)(dbp)->mutex, -1) : 0)
/* Btree/recno local statistics structure. */
struct __db_bt_lstat; typedef struct __db_bt_lstat DB_BTREE_LSTAT;
struct __db_bt_lstat {
u_int32_t bt_freed; /* Pages freed for reuse. */
u_int32_t bt_pfxsaved; /* Bytes saved by prefix compression. */
u_int32_t bt_split; /* Total number of splits. */
u_int32_t bt_rootsplit; /* Root page splits. */
u_int32_t bt_fastsplit; /* Fast splits. */
u_int32_t bt_added; /* Items added. */
u_int32_t bt_deleted; /* Items deleted. */
u_int32_t bt_get; /* Items retrieved. */
u_int32_t bt_cache_hit; /* Hits in fast-insert code. */
u_int32_t bt_cache_miss; /* Misses in fast-insert code. */
};
/*******************************************************
* Environment.
*******************************************************/
/* Type passed to __db_appname(). */
typedef enum {
DB_APP_NONE=0, /* No type (region). */
DB_APP_DATA, /* Data file. */
DB_APP_LOG, /* Log file. */
DB_APP_TMP /* Temporary file. */
} APPNAME;
/*******************************************************
* Regions.
*******************************************************/
/*
* The shared memory regions share an initial structure so that the general
* region code can handle races between the region being deleted and other
* processes waiting on the region mutex.
*
* !!!
* Note, the mutex must be the first entry in the region; see comment above.
*/
typedef struct _rlayout {
db_mutex_t lock; /* Region mutex. */
u_int32_t refcnt; /* Region reference count. */
size_t size; /* Region length. */
int majver; /* Major version number. */
int minver; /* Minor version number. */
int patch; /* Patch version number. */
#define DB_R_DELETED 0x01 /* Region was deleted. */
u_int32_t flags;
} RLAYOUT;
/*******************************************************
* Mpool.
*******************************************************/
/*
* File types for DB access methods. Negative numbers are reserved to DB.
*/
#define DB_FTYPE_BTREE -1 /* Btree. */
#define DB_FTYPE_HASH -2 /* Hash. */
/* Structure used as the DB pgin/pgout pgcookie. */
typedef struct __dbpginfo {
size_t db_pagesize; /* Underlying page size. */
int needswap; /* If swapping required. */
} DB_PGINFO;
/*******************************************************
* Log.
*******************************************************/
/* Initialize an LSN to 'zero'. */
#define ZERO_LSN(LSN) { \
(LSN).file = 0; \
(LSN).offset = 0; \
}
/* Return 1 if LSN is a 'zero' lsn, otherwise return 0. */
#define IS_ZERO_LSN(LSN) ((LSN).file == 0)
/* Test if we need to log a change. */
#define DB_LOGGING(dbp) \
(F_ISSET(dbp, DB_AM_LOGGING) && !F_ISSET(dbp, DB_AM_RECOVER))
#ifdef DEBUG
/*
* Debugging macro to log operations.
* If DEBUG_WOP is defined, log operations that modify the database.
* If DEBUG_ROP is defined, log operations that read the database.
*
* D dbp
* T txn
* O operation (string)
* K key
* A data
* F flags
*/
#define LOG_OP(D, T, O, K, A, F) { \
DB_LSN _lsn; \
DBT _op; \
if (DB_LOGGING((D))) { \
memset(&_op, 0, sizeof(_op)); \
_op.data = O; \
_op.size = strlen(O) + 1; \
(void)__db_debug_log((D)->dbenv->lg_info, \
T, &_lsn, 0, &_op, (D)->log_fileid, K, A, F); \
} \
}
#ifdef DEBUG_ROP
#define DEBUG_LREAD(D, T, O, K, A, F) LOG_OP(D, T, O, K, A, F)
#else
#define DEBUG_LREAD(D, T, O, K, A, F)
#endif
#ifdef DEBUG_WOP
#define DEBUG_LWRITE(D, T, O, K, A, F) LOG_OP(D, T, O, K, A, F)
#else
#define DEBUG_LWRITE(D, T, O, K, A, F)
#endif
#else
#define DEBUG_LREAD(D, T, O, K, A, F)
#define DEBUG_LWRITE(D, T, O, K, A, F)
#endif /* DEBUG */
/*******************************************************
* Transactions and recovery.
*******************************************************/
/*
* The locker id space is divided between the transaction manager and the lock
* manager. Lockid's start at 0 and go to MAX_LOCKER_ID. Txn Id's start at
* MAX_LOCKER_ID + 1 and go up to MAX_TXNID.
*/
#define MAX_LOCKER_ID 0x0fffffff
#define MAX_TXNID 0xffffffff
/*
* Out of band value for a lock. The locks are returned to callers as offsets
* into the lock regions. Since the RLAYOUT structure begins all regions, an
* offset of 0 is guaranteed not to be a valid lock.
*/
#define LOCK_INVALID 0
/* The structure allocated for every transaction. */
struct __db_txn {
DB_TXNMGR *mgrp; /* Pointer to transaction manager. */
DB_TXN *parent; /* Pointer to transaction's parent. */
DB_LSN last_lsn; /* Lsn of last log write. */
u_int32_t txnid; /* Unique transaction id. */
size_t off; /* Detail structure within region. */
TAILQ_ENTRY(__db_txn) links;
};
#endif /* !_DB_INTERNAL_H_ */

410
db2/dbm/dbm.c Normal file
View File

@ -0,0 +1,410 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993
* Margo Seltzer. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)dbm.c 10.5 (Sleepycat) 7/19/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/param.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#endif
#define DB_DBM_HSEARCH
#include "db_int.h"
#include "db_page.h"
#include "hash.h"
/*
*
* This package provides dbm and ndbm compatible interfaces to DB.
*
* The DBM routines, which call the NDBM routines.
*/
static DBM *__cur_db;
static void __db_no_open __P((void));
/* Provide prototypes here since there are none in db.h. */
int dbm_error __P((DBM *));
int dbm_clearerr __P((DBM *));
int dbm_dirfno __P((DBM *));
int dbm_pagfno __P((DBM *));
int
dbminit(file)
char *file;
{
if (__cur_db != NULL)
(void)dbm_close(__cur_db);
if ((__cur_db =
dbm_open(file, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR)) != NULL)
return (0);
if ((__cur_db = dbm_open(file, O_RDONLY, 0)) != NULL)
return (0);
return (-1);
}
datum
fetch(key)
datum key;
{
datum item;
if (__cur_db == NULL) {
__db_no_open();
item.dptr = 0;
return (item);
}
return (dbm_fetch(__cur_db, key));
}
datum
firstkey()
{
datum item;
if (__cur_db == NULL) {
__db_no_open();
item.dptr = 0;
return (item);
}
return (dbm_firstkey(__cur_db));
}
datum
nextkey(key)
datum key;
{
datum item;
if (__cur_db == NULL) {
__db_no_open();
item.dptr = 0;
return (item);
}
return (dbm_nextkey(__cur_db));
}
int
delete(key)
datum key;
{
int ret;
if (__cur_db == NULL) {
__db_no_open();
return (-1);
}
ret = dbm_delete(__cur_db, key);
if (ret == 0)
ret = (((DB *)__cur_db)->sync)((DB *)__cur_db, 0);
return (ret);
}
int
store(key, dat)
datum key, dat;
{
int ret;
if (__cur_db == NULL) {
__db_no_open();
return (-1);
}
ret = dbm_store(__cur_db, key, dat, DBM_REPLACE);
if (ret == 0)
ret = (((DB *)__cur_db)->sync)((DB *)__cur_db, 0);
return (ret);
}
static void
__db_no_open()
{
(void)fprintf(stderr, "dbm: no open database.\n");
}
/*
* This package provides dbm and ndbm compatible interfaces to DB.
*
* The NDBM routines, which call the DB routines.
*/
/*
* Returns:
* *DBM on success
* NULL on failure
*/
DBM *
dbm_open(file, oflags, mode)
const char *file;
int oflags, mode;
{
DB *dbp;
DB_INFO dbinfo;
char path[MAXPATHLEN];
memset(&dbinfo, 0, sizeof(dbinfo));
dbinfo.db_pagesize = 4096;
dbinfo.h_ffactor = 40;
dbinfo.h_nelem = 1;
(void)snprintf(path, sizeof(path), "%s%s", file, DBM_SUFFIX);
if ((errno = db_open(path,
DB_HASH, __db_oflags(oflags), mode, NULL, &dbinfo, &dbp)) != 0)
return (NULL);
return ((DBM *)dbp);
}
/*
* Returns:
* Nothing.
*/
void
dbm_close(db)
DBM *db;
{
(void)db->close(db, 0);
}
/*
* Returns:
* DATUM on success
* NULL on failure
*/
datum
dbm_fetch(db, key)
DBM *db;
datum key;
{
DBT _key, _data;
datum data;
int status;
memset(&_key, 0, sizeof(DBT));
memset(&_data, 0, sizeof(DBT));
_key.size = key.dsize;
_key.data = key.dptr;
status = db->get((DB *)db, NULL, &_key, &_data, 0);
if (status) {
data.dptr = NULL;
data.dsize = 0;
} else {
data.dptr = _data.data;
data.dsize = _data.size;
}
return (data);
}
/*
* Returns:
* DATUM on success
* NULL on failure
*/
datum
dbm_firstkey(db)
DBM *db;
{
DBT _key, _data;
datum key;
int status;
DBC *cp;
if ((cp = TAILQ_FIRST(&db->curs_queue)) == NULL)
if ((errno = db->cursor(db, NULL, &cp)) != 0) {
memset(&key, 0, sizeof(key));
return (key);
}
memset(&_key, 0, sizeof(DBT));
memset(&_data, 0, sizeof(DBT));
status = (cp->c_get)(cp, &_key, &_data, DB_FIRST);
if (status) {
key.dptr = NULL;
key.dsize = 0;
} else {
key.dptr = _key.data;
key.dsize = _key.size;
}
return (key);
}
/*
* Returns:
* DATUM on success
* NULL on failure
*/
datum
dbm_nextkey(db)
DBM *db;
{
DBC *cp;
DBT _key, _data;
datum key;
int status;
if ((cp = TAILQ_FIRST(&db->curs_queue)) == NULL)
if ((errno = db->cursor(db, NULL, &cp)) != 0) {
memset(&key, 0, sizeof(key));
return (key);
}
memset(&_key, 0, sizeof(DBT));
memset(&_data, 0, sizeof(DBT));
status = (cp->c_get)(cp, &_key, &_data, DB_NEXT);
if (status) {
key.dptr = NULL;
key.dsize = 0;
} else {
key.dptr = _key.data;
key.dsize = _key.size;
}
return (key);
}
/*
* Returns:
* 0 on success
* <0 failure
*/
int
dbm_delete(db, key)
DBM *db;
datum key;
{
DBT _key;
int ret;
memset(&_key, 0, sizeof(DBT));
_key.data = key.dptr;
_key.size = key.dsize;
ret = (((DB *)db)->del)((DB *)db, NULL, &_key, 0);
if (ret < 0)
errno = ENOENT;
else if (ret > 0) {
errno = ret;
ret = -1;
}
return (ret);
}
/*
* Returns:
* 0 on success
* <0 failure
* 1 if DBM_INSERT and entry exists
*/
int
dbm_store(db, key, data, flags)
DBM *db;
datum key, data;
int flags;
{
DBT _key, _data;
memset(&_key, 0, sizeof(DBT));
memset(&_data, 0, sizeof(DBT));
_key.data = key.dptr;
_key.size = key.dsize;
_data.data = data.dptr;
_data.size = data.dsize;
return (db->put((DB *)db,
NULL, &_key, &_data, (flags == DBM_INSERT) ? DB_NOOVERWRITE : 0));
}
int
dbm_error(db)
DBM *db;
{
HTAB *hp;
hp = (HTAB *)db->internal;
return (hp->local_errno);
}
int
dbm_clearerr(db)
DBM *db;
{
HTAB *hp;
hp = (HTAB *)db->internal;
hp->local_errno = 0;
return (0);
}
/*
* XXX
* We only have a single file descriptor that we can return, not two. Return
* the same one for both files. Hopefully, the user is using it for locking
* and picked one to use at random.
*/
int
dbm_dirfno(db)
DBM *db;
{
int fd;
(void)db->fd(db, &fd);
return (fd);
}
int
dbm_pagfno(db)
DBM *db;
{
int fd;
(void)db->fd(db, &fd);
return (fd);
}

1440
db2/hash/hash.c Normal file

File diff suppressed because it is too large Load Diff

211
db2/hash/hash.src Normal file
View File

@ -0,0 +1,211 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1995, 1996
* Margo Seltzer. All rights reserved.
*/
/*
* Copyright (c) 1995, 1996
* The President and Fellows of Harvard University. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)hash.src 10.1 (Sleepycat) 4/12/97
*/
#include "config.h"
/*
* This is the source file used to create the logging functions for the
* hash package. Each access method (or set of routines wishing to register
* record types with the transaction system) should have a file like this.
* Each type of log record and its parameters is defined. The basic
* format of a record definition is:
*
* BEGIN <RECORD_TYPE>
* ARG|STRING|POINTER <variable name> <variable type> <printf format>
* ...
* END
* ARG the argument is a simple parameter of the type * specified.
* DBT the argument is a DBT (db.h) containing a length and pointer.
* PTR the argument is a pointer to the data type specified; the entire
* type should be logged.
*
* There are a set of shell scripts of the form xxx.sh that generate c
* code and or h files to process these. (This is probably better done
* in a single PERL script, but for now, this works.)
*
* The DB recovery system requires the following three fields appear in
* every record, and will assign them to the per-record-type structures
* as well as making them the first parameters to the appropriate logging
* call.
* rectype: record-type, identifies the structure and log/read call
* txnid: transaction id, a DBT in this implementation
* prev: the last LSN for this transaction
*/
/*
* Use the argument of PREFIX as the prefix for all record types,
* routines, id numbers, etc.
*/
PREFIX ham
/*
* HASH-insdel: used for hash to insert/delete a pair of entries onto a master
* page. The pair might be regular key/data pairs or they might be the
* structures that refer to off page items, duplicates or offpage duplicates.
* opcode - PUTPAIR/DELPAIR + big masks
* fileid - identifies the file referenced
* pgno - page within file
* ndx - index on the page of the item being added (item index)
* pagelsn - lsn on the page before the update
* key - the key being inserted
* data - the data being inserted
*/
BEGIN insdel
ARG opcode u_int32_t lu
ARG fileid u_int32_t lu
ARG pgno db_pgno_t lu
ARG ndx u_int32_t lu
POINTER pagelsn DB_LSN * lu
DBT key DBT s
DBT data DBT s
END
/*
* Used to add and remove overflow pages.
* prev_pgno is the previous page that is going to get modified to
* point to this one. If this is the first page in a chain
* then prev_pgno should be PGNO_INVALID.
* new_pgno is the page being allocated.
* next_pgno is the page that follows this one. On allocation,
* this should be PGNO_INVALID. For deletes, it may exist.
* pagelsn is the old lsn on the page.
*/
BEGIN newpage
ARG opcode u_int32_t lu
ARG fileid u_int32_t lu
ARG prev_pgno db_pgno_t lu
POINTER prevlsn DB_LSN * lu
ARG new_pgno db_pgno_t lu
POINTER pagelsn DB_LSN * lu
ARG next_pgno db_pgno_t lu
POINTER nextlsn DB_LSN * lu
END
/*
* Splitting requires two types of log messages. The first
* logs the meta-data of the split. The second logs the
* data on the original page. To redo the split, we have
* to visit the new page (pages) and add the items back
* on the page if they are not yet there.
* For the meta-data split
* bucket: max_bucket in table before split
* ovflpoint: overflow point before split.
* spares: spares[ovflpoint] before split.
*/
BEGIN splitmeta
ARG fileid u_int32_t lu
ARG bucket u_int32_t lu
ARG ovflpoint u_int32_t lu
ARG spares u_int32_t lu
POINTER metalsn DB_LSN * lu
END
BEGIN splitdata
ARG fileid u_int32_t lu
ARG opcode u_int32_t lu
ARG pgno db_pgno_t lu
DBT pageimage DBT s
POINTER pagelsn DB_LSN * lu
END
/*
* HASH-replace: is used for hash to handle partial puts that only
* affect a single master page.
* fileid - identifies the file referenced
* pgno - page within file
* ndx - index on the page of the item being modified (item index)
* pagelsn - lsn on the page before the update
* off - offset in the old item where the new item is going.
* olditem - DBT that describes the part of the item being replaced.
* newitem - DBT of the new item.
* makedup - this was a replacement that made an item a duplicate.
*/
BEGIN replace
ARG fileid u_int32_t lu
ARG pgno db_pgno_t lu
ARG ndx u_int32_t lu
POINTER pagelsn DB_LSN * lu
ARG off int32_t ld
DBT olditem DBT s
DBT newitem DBT s
ARG makedup u_int32_t lu
END
/*
* HASH-newpgno: is used to record getting/deleting a new page number.
* This doesn't require much data modification, just modifying the
* meta-data.
* pgno is the page being allocated/freed.
* free_pgno is the next_pgno on the free list.
* old_type was the type of a page being deallocated.
* old_pgno was the next page number before the deallocation. We use it
* to indicate whether we incremented the spares count or not
* during this allocation.
*/
BEGIN newpgno
ARG opcode u_int32_t lu
ARG fileid u_int32_t lu
ARG pgno db_pgno_t lu
ARG free_pgno db_pgno_t lu
ARG old_type u_int32_t lu
ARG old_pgno db_pgno_t lu
ARG new_type u_int32_t lu
POINTER pagelsn DB_LSN * lu
POINTER metalsn DB_LSN * lu
END
/*
* ovfl: initialize a set of overflow pages.
*/
BEGIN ovfl
ARG fileid u_int32_t lu
ARG start_pgno db_pgno_t lu
ARG npages u_int32_t lu
ARG free_pgno db_pgno_t lu
POINTER metalsn DB_LSN * lu
END

1343
db2/hash/hash_auto.c Normal file

File diff suppressed because it is too large Load Diff

101
db2/hash/hash_conv.c Normal file
View File

@ -0,0 +1,101 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)hash_conv.c 10.3 (Sleepycat) 6/21/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "db_swap.h"
#include "hash.h"
/*
* __h_pgin, __ham_pgout --
* Convert host-specific page layout to/from the host-independent
* format stored on disk.
*
* PUBLIC: int __ham_pgin __P((db_pgno_t, void *, DBT *));
* PUBLIC: int __ham_pgout __P((db_pgno_t, void *, DBT *));
*/
int
__ham_pgin(pg, pp, cookie)
db_pgno_t pg;
void *pp;
DBT *cookie;
{
DB_PGINFO *pginfo;
u_int32_t tpgno;
pginfo = (DB_PGINFO *)cookie->data;
tpgno = PGNO((PAGE *)pp);
if (pginfo->needswap)
M_32_SWAP(tpgno);
if (pg != PGNO_METADATA && pg != tpgno) {
P_INIT(pp, pginfo->db_pagesize,
pg, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
return (0);
}
if (!pginfo->needswap)
return (0);
return (pg == PGNO_METADATA ? __ham_mswap(pp) : __db_pgin(pg, pp));
}
int
__ham_pgout(pg, pp, cookie)
db_pgno_t pg;
void *pp;
DBT *cookie;
{
DB_PGINFO *pginfo;
pginfo = (DB_PGINFO *)cookie->data;
if (!pginfo->needswap)
return (0);
return (pg == PGNO_METADATA ? __ham_mswap(pp) : __db_pgout(pg, pp));
}
/*
* __ham_mswap --
* Swap the bytes on the hash metadata page.
*
* PUBLIC: int __ham_mswap __P((void *));
*/
int
__ham_mswap(pg)
void *pg;
{
u_int8_t *p;
int i;
p = (u_int8_t *)pg;
SWAP32(p); /* lsn part 1 */
SWAP32(p); /* lsn part 2 */
SWAP32(p); /* pgno */
SWAP32(p); /* magic */
SWAP32(p); /* version */
SWAP32(p); /* pagesize */
SWAP32(p); /* ovfl_point */
SWAP32(p); /* last_freed */
SWAP32(p); /* max_bucket */
SWAP32(p); /* high_mask */
SWAP32(p); /* low_mask */
SWAP32(p); /* ffactor */
SWAP32(p); /* nelem */
SWAP32(p); /* h_charkey */
SWAP32(p); /* flags */
for (i = 0; i < NCACHED; ++i)
SWAP32(p); /* spares */
return (0);
}

96
db2/hash/hash_debug.c Normal file
View File

@ -0,0 +1,96 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1995
* The President and Fellows of Harvard University. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Jeremy Rassen.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)hash_debug.c 10.2 (Sleepycat) 6/21/97";
#endif /* not lint */
#ifdef DEBUG
/*
* PACKAGE: hashing
*
* DESCRIPTION:
* Debug routines.
*
* ROUTINES:
*
* External
* __dump_bucket
*/
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <stdio.h>
#include <string.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "hash.h"
/*
* __ham_dump_bucket --
*
* PUBLIC: #ifdef DEBUG
* PUBLIC: void __ham_dump_bucket __P((HTAB *, u_int32_t));
* PUBLIC: #endif
*/
void
__ham_dump_bucket(hashp, bucket)
HTAB *hashp;
u_int32_t bucket;
{
PAGE *p;
db_pgno_t pgno;
int ret;
for (pgno = BUCKET_TO_PAGE(hashp, bucket); pgno != PGNO_INVALID;) {
if ((ret = memp_fget(hashp->dbp->mpf, &pgno, 0, &p)) != 0)
break;
(void)__db_prpage(p, 1);
pgno = p->next_pgno;
(void)memp_fput(hashp->dbp->mpf, p, 0);
}
}
#endif /* DEBUG */

544
db2/hash/hash_dup.c Normal file
View File

@ -0,0 +1,544 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)hash_dup.c 10.5 (Sleepycat) 7/27/97";
#endif /* not lint */
/*
* PACKAGE: hashing
*
* DESCRIPTION:
* Manipulation of duplicates for the hash package.
*
* ROUTINES:
*
* External
* __add_dup
* Internal
*/
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "db_swap.h"
#include "hash.h"
static int __ham_check_move __P((HTAB *, HASH_CURSOR *, int32_t));
static int __ham_dup_convert __P((HTAB *, HASH_CURSOR *));
static int __ham_make_dup __P((const DBT *, DBT *d, void **, u_int32_t *));
/*
* Called from hash_access to add a duplicate key. nval is the new
* value that we want to add. The flags correspond to the flag values
* to cursor_put indicating where to add the new element.
* There are 4 cases.
* Case 1: The existing duplicate set already resides on a separate page.
* We can use common code for this.
* Case 2: The element is small enough to just be added to the existing set.
* Case 3: The element is large enough to be a big item, so we're going to
* have to push the set onto a new page.
* Case 4: The element is large enough to push the duplicate set onto a
* separate page.
*
* PUBLIC: int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, int));
*/
int
__ham_add_dup(hashp, hcp, nval, flags)
HTAB *hashp;
HASH_CURSOR *hcp;
DBT *nval;
int flags;
{
DBT pval, tmp_val;
HKEYDATA *hk;
u_int32_t del_len, new_size;
int ret;
if (flags == DB_CURRENT && hcp->dpgno == PGNO_INVALID)
del_len = hcp->dup_len;
else
del_len = 0;
if ((ret = __ham_check_move(hashp, hcp,
(int32_t)DUP_SIZE(nval->size) - (int32_t)del_len)) != 0)
return (ret);
/*
* Check if resulting duplicate set is going to need to go
* onto a separate duplicate page. If so, convert the
* duplicate set and add the new one. After conversion,
* hcp->dndx is the first free ndx or the index of the
* current pointer into the duplicate set.
*/
hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
new_size = DUP_SIZE(nval->size) - del_len + LEN_HKEYDATA(hcp->pagep,
hashp->hdr->pagesize, H_DATAINDEX(hcp->bndx));
/*
* We convert to off-page duplicates if the item is a big item,
* the addition of the new item will make the set large, or
* if there isn't enough room on this page to add the next item.
*/
if (hk->type != H_OFFDUP &&
(hk->type == H_OFFPAGE || ISBIG(hashp, new_size) ||
DUP_SIZE(nval->size) - del_len > P_FREESPACE(hcp->pagep))) {
if ((ret = __ham_dup_convert(hashp, hcp)) != 0)
return (ret);
else
hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
}
/* There are two separate cases here: on page and off page. */
if (hk->type != H_OFFDUP) {
if (hk->type != H_DUPLICATE) {
hk->type = H_DUPLICATE;
pval.flags = 0;
pval.data = hk->data;
pval.size = LEN_HDATA(hcp->pagep, hashp->hdr->pagesize,
hcp->bndx);
if ((ret = __ham_make_dup(&pval, &tmp_val, &hcp->big_data,
&hcp->big_datalen)) != 0 ||
(ret = __ham_replpair(hashp, hcp, &tmp_val, 1)) != 0)
return (ret);
}
/* Now make the new entry a duplicate. */
if ((ret = __ham_make_dup(nval,
&tmp_val, &hcp->big_data, &hcp->big_datalen)) != 0)
return (ret);
tmp_val.dlen = 0;
switch (flags) { /* On page. */
case DB_KEYFIRST:
tmp_val.doff = 0;
break;
case DB_KEYLAST:
tmp_val.doff = LEN_HDATA(hcp->pagep,
hashp->hdr->pagesize, hcp->bndx);
break;
case DB_CURRENT:
tmp_val.doff = hcp->dup_off;
tmp_val.dlen = DUP_SIZE(hcp->dup_len);
break;
case DB_BEFORE:
tmp_val.doff = hcp->dup_off;
break;
case DB_AFTER:
tmp_val.doff = hcp->dup_off + DUP_SIZE(hcp->dup_len);
break;
}
/* Add the duplicate. */
ret = __ham_replpair(hashp, hcp, &tmp_val, 0);
if (ret == 0)
ret = __ham_dirty_page(hashp, hcp->pagep);
__ham_c_update(hashp, hcp, hcp->pgno, tmp_val.size, 1, 1);
return (ret);
}
/* If we get here, then we're on duplicate pages. */
if (hcp->dpgno == PGNO_INVALID) {
memcpy(&hcp->dpgno,
(u_int8_t *)hk + SSZ(HOFFDUP, pgno), sizeof(db_pgno_t));
hcp->dndx = 0;
}
switch (flags) {
case DB_KEYFIRST:
/*
* The only way that we are already on a dup page is
* if we just converted the on-page representation.
* In that case, we've only got one page of duplicates.
*/
if (hcp->dpagep == NULL && (ret =
__db_dend(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0)
return (ret);
hcp->dndx = 0;
break;
case DB_KEYLAST:
if (hcp->dpagep == NULL && (ret =
__db_dend(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0)
return (ret);
hcp->dpgno = PGNO(hcp->dpagep);
hcp->dndx = NUM_ENT(hcp->dpagep);
break;
case DB_CURRENT:
if ((ret = __db_ditem(hashp->dbp, hcp->dpagep, hcp->dndx,
BKEYDATA_SIZE(GET_BKEYDATA(hcp->dpagep, hcp->dndx)->len)))
!= 0)
return (ret);
break;
case DB_BEFORE: /* The default behavior is correct. */
break;
case DB_AFTER:
hcp->dndx++;
break;
}
ret = __db_dput(hashp->dbp,
nval, &hcp->dpagep, &hcp->dndx, __ham_overflow_page);
hcp->pgno = PGNO(hcp->pagep);
__ham_c_update(hashp, hcp, hcp->pgno, nval->size, 1, 1);
return (ret);
}
/*
* Convert an on-page set of duplicates to an offpage set of duplicates.
*/
static int
__ham_dup_convert(hashp, hcp)
HTAB *hashp;
HASH_CURSOR *hcp;
{
BOVERFLOW bo;
DBT dbt;
HOFFPAGE ho;
db_indx_t dndx, len;
int ret;
u_int8_t *p, *pend;
/*
* Create a new page for the duplicates.
*/
if ((ret =
__ham_overflow_page(hashp->dbp, P_DUPLICATE, &hcp->dpagep)) != 0)
return (ret);
hcp->dpagep->type = P_DUPLICATE;
hcp->dpgno = PGNO(hcp->dpagep);
/*
* Now put the duplicates onto the new page.
*/
dbt.flags = 0;
switch (((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->type) {
case H_KEYDATA:
/* Simple case, one key on page; move it to dup page. */
dndx = 0;
dbt.size =
LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx);
dbt.data =
((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->data;
ret = __db_pitem(hashp->dbp, hcp->dpagep,
(u_int32_t)dndx, BKEYDATA_SIZE(dbt.size), NULL, &dbt);
if (ret == 0)
__ham_dirty_page(hashp, hcp->dpagep);
break;
case H_OFFPAGE:
/* Simple case, one key on page; move it to dup page. */
dndx = 0;
memcpy(&ho,
P_ENTRY(hcp->pagep, H_DATAINDEX(hcp->bndx)), HOFFPAGE_SIZE);
bo.deleted = 0;
bo.type = ho.type;
bo.pgno = ho.pgno;
bo.tlen = ho.tlen;
dbt.size = BOVERFLOW_SIZE;
dbt.data = &bo;
ret = __db_pitem(hashp->dbp, hcp->dpagep,
(u_int32_t)dndx, dbt.size, &dbt, NULL);
if (ret == 0)
__ham_dirty_page(hashp, hcp->dpagep);
break;
case H_DUPLICATE:
p = ((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->data;
pend = p +
LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx);
for (dndx = 0; p < pend; dndx++) {
memcpy(&len, p, sizeof(db_indx_t));
dbt.size = len;
p += sizeof(db_indx_t);
dbt.data = p;
p += len + sizeof(db_indx_t);
ret = __db_dput(hashp->dbp, &dbt,
&hcp->dpagep, &dndx, __ham_overflow_page);
if (ret != 0)
break;
}
break;
default:
ret = __db_pgfmt(hashp->dbp, (u_long)hcp->pgno);
}
if (ret == 0) {
/*
* Now attach this to the source page in place of
* the old duplicate item.
*/
__ham_move_offpage(hashp, hcp->pagep,
(u_int32_t)H_DATAINDEX(hcp->bndx), hcp->dpgno);
/* Can probably just do a "put" here. */
ret = __ham_dirty_page(hashp, hcp->pagep);
} else {
(void)__ham_del_page(hashp->dbp, hcp->dpagep);
hcp->dpagep = NULL;
}
return (ret);
}
static int
__ham_make_dup(notdup, dup, bufp, sizep)
const DBT *notdup;
DBT *dup;
void **bufp;
u_int32_t *sizep;
{
db_indx_t tsize, item_size;
int ret;
u_int8_t *p;
item_size = (db_indx_t)notdup->size;
tsize = DUP_SIZE(item_size);
if ((ret = __ham_init_dbt(dup, tsize, bufp, sizep)) != 0)
return (ret);
dup->dlen = 0;
dup->flags = notdup->flags;
F_SET(dup, DB_DBT_PARTIAL);
p = dup->data;
memcpy(p, &item_size, sizeof(db_indx_t));
p += sizeof(db_indx_t);
memcpy(p, notdup->data, notdup->size);
p += notdup->size;
memcpy(p, &item_size, sizeof(db_indx_t));
dup->doff = 0;
dup->dlen = notdup->size;
return (0);
}
static int
__ham_check_move(hashp, hcp, add_len)
HTAB *hashp;
HASH_CURSOR *hcp;
int32_t add_len;
{
DBT k, d;
DB_LSN new_lsn;
HKEYDATA *hk;
PAGE *next_pagep;
db_pgno_t next_pgno;
int rectype, ret;
u_int32_t new_datalen, old_len;
/*
* Check if we can do whatever we need to on this page. If not,
* then we'll have to move the current element to a new page.
*/
hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
/*
* If the item is already off page duplicates or an offpage item,
* then we know we can do whatever we need to do in-place
*/
if (hk->type == H_OFFDUP || hk->type == H_OFFPAGE)
return (0);
old_len =
LEN_HITEM(hcp->pagep, hashp->hdr->pagesize, H_DATAINDEX(hcp->bndx));
new_datalen = old_len - HKEYDATA_SIZE(0) + add_len;
/*
* We need to add a new page under two conditions:
* 1. The addition makes the total data length cross the BIG
* threshold and the OFFDUP structure won't fit on this page.
* 2. The addition does not make the total data cross the
* threshold, but the new data won't fit on the page.
* If neither of these is true, then we can return.
*/
if (ISBIG(hashp, new_datalen) && (old_len > HOFFDUP_SIZE ||
HOFFDUP_SIZE - old_len <= P_FREESPACE(hcp->pagep)))
return (0);
if (!ISBIG(hashp, new_datalen) &&
add_len <= (int32_t)P_FREESPACE(hcp->pagep))
return (0);
/*
* If we get here, then we need to move the item to a new page.
* Check if there are more pages in the chain.
*/
new_datalen = ISBIG(hashp, new_datalen) ?
HOFFDUP_SIZE : HKEYDATA_SIZE(new_datalen);
next_pagep = NULL;
for (next_pgno = NEXT_PGNO(hcp->pagep); next_pgno != PGNO_INVALID;
next_pgno = NEXT_PGNO(next_pagep)) {
if (next_pagep != NULL &&
(ret = __ham_put_page(hashp->dbp, next_pagep, 0)) != 0)
return (ret);
if ((ret = __ham_get_page(hashp->dbp, next_pgno, &next_pagep)) != 0)
return (ret);
if (P_FREESPACE(next_pagep) >= new_datalen)
break;
}
/* No more pages, add one. */
if (next_pagep == NULL &&
(ret = __ham_add_ovflpage(hashp, hcp->pagep, 0, &next_pagep)) != 0)
return (ret);
/* Add new page at the end of the chain. */
if (P_FREESPACE(next_pagep) < new_datalen &&
(ret = __ham_add_ovflpage(hashp, next_pagep, 1, &next_pagep)) != 0)
return (ret);
/* Copy the item to the new page. */
if (DB_LOGGING(hashp->dbp)) {
rectype = PUTPAIR;
k.flags = 0;
d.flags = 0;
if (H_PAIRKEY(hcp->pagep, hcp->bndx)->type == H_OFFPAGE) {
rectype |= PAIR_KEYMASK;
k.data = H_PAIRKEY(hcp->pagep, hcp->bndx);
k.size = HOFFPAGE_SIZE;
} else {
k.data = H_PAIRKEY(hcp->pagep, hcp->bndx)->data;
k.size = LEN_HKEY(hcp->pagep,
hashp->hdr->pagesize, hcp->bndx);
}
if (hk->type == H_OFFPAGE) {
rectype |= PAIR_DATAMASK;
d.data = H_PAIRDATA(hcp->pagep, hcp->bndx);
d.size = HOFFPAGE_SIZE;
} else {
d.data = H_PAIRDATA(hcp->pagep, hcp->bndx)->data;
d.size = LEN_HDATA(hcp->pagep,
hashp->hdr->pagesize, hcp->bndx);
}
if ((ret = __ham_insdel_log(hashp->dbp->dbenv->lg_info,
(DB_TXN *)hashp->dbp->txn, &new_lsn, 0, rectype,
hashp->dbp->log_fileid, PGNO(next_pagep),
(u_int32_t)H_NUMPAIRS(next_pagep), &LSN(next_pagep),
&k, &d)) != 0)
return (ret);
/* Move lsn onto page. */
LSN(next_pagep) = new_lsn; /* Structure assignment. */
}
__ham_copy_item(hashp, hcp->pagep, H_KEYINDEX(hcp->bndx), next_pagep);
__ham_copy_item(hashp, hcp->pagep, H_DATAINDEX(hcp->bndx), next_pagep);
/* Now delete the pair from the current page. */
ret = __ham_del_pair(hashp, hcp);
(void)__ham_put_page(hashp->dbp, hcp->pagep, 1);
hcp->pagep = next_pagep;
hcp->pgno = PGNO(hcp->pagep);
hcp->bndx = H_NUMPAIRS(hcp->pagep) - 1;
F_SET(hcp, H_EXPAND);
return (ret);
}
/*
* Replace an onpage set of duplicates with the OFFDUP structure that
* references the duplicate page.
* XXX This is really just a special case of __onpage_replace; we should
* probably combine them.
* PUBLIC: void __ham_move_offpage __P((HTAB *, PAGE *, u_int32_t, db_pgno_t));
*/
void
__ham_move_offpage(hashp, pagep, ndx, pgno)
HTAB *hashp;
PAGE *pagep;
u_int32_t ndx;
db_pgno_t pgno;
{
DBT new_dbt;
DBT old_dbt;
HOFFDUP od;
db_indx_t i;
int32_t shrink;
u_int8_t *src;
od.type = H_OFFDUP;
od.pgno = pgno;
if (DB_LOGGING(hashp->dbp)) {
new_dbt.data = &od;
new_dbt.size = HOFFDUP_SIZE;
old_dbt.data = P_ENTRY(pagep, ndx);
old_dbt.size = LEN_HITEM(pagep, hashp->hdr->pagesize, ndx);
(void)__ham_replace_log(hashp->dbp->dbenv->lg_info,
(DB_TXN *)hashp->dbp->txn, &LSN(pagep), 0,
hashp->dbp->log_fileid, PGNO(pagep), (u_int32_t)ndx,
&LSN(pagep), -1, &old_dbt, &new_dbt, 0);
}
shrink =
LEN_HITEM(pagep, hashp->hdr->pagesize, ndx) - HOFFDUP_SIZE;
if (shrink != 0) {
/* Copy data. */
src = (u_int8_t *)(pagep) + HOFFSET(pagep);
memmove(src + shrink, src, pagep->inp[ndx] - HOFFSET(pagep));
HOFFSET(pagep) += shrink;
/* Update index table. */
for (i = ndx; i < NUM_ENT(pagep); i++)
pagep->inp[i] += shrink;
}
/* Now copy the offdup entry onto the page. */
memcpy(P_ENTRY(pagep, ndx), &od, HOFFDUP_SIZE);
}

219
db2/hash/hash_func.c Normal file
View File

@ -0,0 +1,219 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993
* Margo Seltzer. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)hash_func.c 10.6 (Sleepycat) 7/26/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "hash.h"
/*
* __ham_func2 --
* Phong Vo's linear congruential hash.
*
* PUBLIC: u_int32_t __ham_func2 __P((const void *, u_int32_t));
*/
#define dcharhash(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c))
u_int32_t
__ham_func2(key, len)
const void *key;
u_int32_t len;
{
const u_int8_t *e, *k;
u_int32_t h;
u_int8_t c;
k = key;
e = k + len;
for (h = 0; k != e;) {
c = *k++;
if (!c && k > e)
break;
dcharhash(h, c);
}
return (h);
}
/*
* __ham_func3 --
* Ozan Yigit's original sdbm hash.
*
* Ugly, but fast. Break the string up into 8 byte units. On the first time
* through the loop get the "leftover bytes" (strlen % 8). On every other
* iteration, perform 8 HASHC's so we handle all 8 bytes. Essentially, this
* saves us 7 cmp & branch instructions.
*
* PUBLIC: u_int32_t __ham_func3 __P((const void *, u_int32_t));
*/
u_int32_t
__ham_func3(key, len)
const void *key;
u_int32_t len;
{
const u_int8_t *k;
u_int32_t n, loop;
if (len == 0)
return (0);
#define HASHC n = *k++ + 65599 * n
n = 0;
k = key;
loop = (len + 8 - 1) >> 3;
switch (len & (8 - 1)) {
case 0:
do {
HASHC;
case 7:
HASHC;
case 6:
HASHC;
case 5:
HASHC;
case 4:
HASHC;
case 3:
HASHC;
case 2:
HASHC;
case 1:
HASHC;
} while (--loop);
}
return (n);
}
/*
* __ham_func4 --
* Chris Torek's hash function. Although this function performs only
* slightly worse than __ham_func5 on strings, it performs horribly on
* numbers.
*
* PUBLIC: u_int32_t __ham_func4 __P((const void *, u_int32_t));
*/
u_int32_t
__ham_func4(key, len)
const void *key;
u_int32_t len;
{
const u_int8_t *k;
u_int32_t h, loop;
if (len == 0)
return (0);
#define HASH4a h = (h << 5) - h + *k++;
#define HASH4b h = (h << 5) + h + *k++;
#define HASH4 HASH4b
h = 0;
k = key;
loop = (len + 8 - 1) >> 3;
switch (len & (8 - 1)) {
case 0:
do {
HASH4;
case 7:
HASH4;
case 6:
HASH4;
case 5:
HASH4;
case 4:
HASH4;
case 3:
HASH4;
case 2:
HASH4;
case 1:
HASH4;
} while (--loop);
}
return (h);
}
/*
* Fowler/Noll/Vo hash
*
* The basis of the hash algorithm was taken from an idea sent by email to the
* IEEE Posix P1003.2 mailing list from Phong Vo (kpv@research.att.com) and
* Glenn Fowler (gsf@research.att.com). Landon Curt Noll (chongo@toad.com)
* later improved on their algorithm.
*
* The magic is in the interesting relationship between the special prime
* 16777619 (2^24 + 403) and 2^32 and 2^8.
*
* This hash produces the fewest collisions of any function that we've seen so
* far, and works well on both numbers and strings.
*
* PUBLIC: u_int32_t __ham_func5 __P((const void *, u_int32_t));
*/
u_int32_t
__ham_func5(key, len)
const void *key;
u_int32_t len;
{
const u_int8_t *k, *e;
u_int32_t h;
k = key;
e = k + len;
for (h = 0; k < e; ++k) {
h *= 16777619;
h ^= *k;
}
return (h);
}

1775
db2/hash/hash_page.c Normal file

File diff suppressed because it is too large Load Diff

810
db2/hash/hash_rec.c Normal file
View File

@ -0,0 +1,810 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1995, 1996
* Margo Seltzer. All rights reserved.
*/
/*
* Copyright (c) 1995, 1996
* The President and Fellows of Harvard University. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)hash_rec.c 10.12 (Sleepycat) 8/22/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#endif
#include "db_int.h"
#include "shqueue.h"
#include "db_page.h"
#include "hash.h"
#include "btree.h"
#include "log.h"
#include "db_dispatch.h"
#include "common_ext.h"
/*
* __ham_insdel_recover --
*
* PUBLIC: int __ham_insdel_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__ham_insdel_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__ham_insdel_args *argp;
DB *mdbp, *file_dbp;
DB_MPOOLFILE *mpf;
HTAB *hashp;
PAGE *pagep;
u_int32_t op;
int cmp_n, cmp_p, getmeta, ret;
getmeta = 0;
hashp = NULL; /* XXX: shut the compiler up. */
REC_PRINT(__ham_insdel_print);
REC_INTRO(__ham_insdel_read);
ret = memp_fget(mpf, &argp->pgno, 0, &pagep);
if (ret != 0)
if (!redo) {
/*
* We are undoing and the page doesn't exist. That
* is equivalent to having a pagelsn of 0, so we
* would not have to undo anything. In this case,
* don't bother creating a page.
*/
*lsnp = argp->prev_lsn;
ret = 0;
goto out;
} else if ((ret = memp_fget(mpf, &argp->pgno,
DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
hashp = (HTAB *)file_dbp->internal;
GET_META(file_dbp, hashp);
getmeta = 1;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
/*
* Two possible things going on:
* redo a delete/undo a put: delete the item from the page.
* redo a put/undo a delete: add the item to the page.
* If we are undoing a delete, then the information logged is the
* entire entry off the page, not just the data of a dbt. In
* this case, we want to copy it back onto the page verbatim.
* We do this by calling __putitem with the type H_OFFPAGE instead
* of H_KEYDATA.
*/
op = OPCODE_OF(argp->opcode);
if ((op == DELPAIR && cmp_n == 0 && !redo) ||
(op == PUTPAIR && cmp_p == 0 && redo)) {
/* Need to redo a PUT or undo a delete. */
__ham_putitem(pagep, &argp->key,
!redo || PAIR_ISKEYBIG(argp->opcode) ?
H_OFFPAGE : H_KEYDATA);
__ham_putitem(pagep, &argp->data,
!redo || PAIR_ISDATABIG(argp->opcode) ?
H_OFFPAGE : H_KEYDATA);
LSN(pagep) = redo ? *lsnp : argp->pagelsn;
if ((ret = __ham_put_page(file_dbp, pagep, 1)) != 0)
goto out;
} else if ((op == DELPAIR && cmp_p == 0 && redo)
|| (op == PUTPAIR && cmp_n == 0 && !redo)) {
/* Need to undo a put or redo a delete. */
__ham_dpair(file_dbp, pagep, argp->ndx);
LSN(pagep) = redo ? *lsnp : argp->pagelsn;
if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
goto out;
} else
if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
goto out;
/* Return the previous LSN. */
*lsnp = argp->prev_lsn;
out: if (getmeta)
RELEASE_META(file_dbp, hashp);
REC_CLOSE;
}
/*
* __ham_newpage_recover --
* This log message is used when we add/remove overflow pages. This
* message takes care of the pointer chains, not the data on the pages.
*
* PUBLIC: int __ham_newpage_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__ham_newpage_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__ham_newpage_args *argp;
DB *mdbp, *file_dbp;
DB_MPOOLFILE *mpf;
HTAB *hashp;
PAGE *pagep;
int cmp_n, cmp_p, change, getmeta, ret;
getmeta = 0;
hashp = NULL; /* XXX: shut the compiler up. */
REC_PRINT(__ham_newpage_print);
REC_INTRO(__ham_newpage_read);
ret = memp_fget(mpf, &argp->new_pgno, 0, &pagep);
if (ret != 0)
if (!redo) {
/*
* We are undoing and the page doesn't exist. That
* is equivalent to having a pagelsn of 0, so we
* would not have to undo anything. In this case,
* don't bother creating a page.
*/
ret = 0;
goto ppage;
} else if ((ret = memp_fget(mpf, &argp->new_pgno,
DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
hashp = (HTAB *)file_dbp->internal;
GET_META(file_dbp, hashp);
getmeta = 1;
/*
* There are potentially three pages we need to check: the one
* that we created/deleted, the one before it and the one after
* it.
*/
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
change = 0;
if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) ||
(cmp_n == 0 && !redo && argp->opcode == DELOVFL)) {
/* Redo a create new page or undo a delete new page. */
P_INIT(pagep, file_dbp->pgsize, argp->new_pgno,
argp->prev_pgno, argp->next_pgno, 0, P_HASH);
change = 1;
} else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) ||
(cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) {
/*
* Redo a delete or undo a create new page. All we
* really need to do is change the LSN.
*/
change = 1;
}
if (!change) {
if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
goto out;
} else {
LSN(pagep) = redo ? *lsnp : argp->pagelsn;
if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
goto out;
}
/* Now do the prev page. */
ppage: if (argp->prev_pgno != PGNO_INVALID) {
ret = memp_fget(mpf, &argp->prev_pgno, 0, &pagep);
if (ret != 0)
if (!redo) {
/*
* We are undoing and the page doesn't exist.
* That is equivalent to having a pagelsn of 0,
* so we would not have to undo anything. In
* this case, don't bother creating a page.
*/
ret = 0;
goto npage;
} else if ((ret =
memp_fget(mpf, &argp->prev_pgno,
DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->prevlsn);
change = 0;
if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) ||
(cmp_n == 0 && !redo && argp->opcode == DELOVFL)) {
/* Redo a create new page or undo a delete new page. */
pagep->next_pgno = argp->new_pgno;
change = 1;
} else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) ||
(cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) {
/* Redo a delete or undo a create new page. */
pagep->next_pgno = argp->next_pgno;
change = 1;
}
if (!change) {
if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
goto out;
} else {
LSN(pagep) = redo ? *lsnp : argp->prevlsn;
if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
goto out;
}
}
/* Now time to do the next page */
npage: if (argp->next_pgno != PGNO_INVALID) {
ret = memp_fget(mpf, &argp->next_pgno, 0, &pagep);
if (ret != 0)
if (!redo) {
/*
* We are undoing and the page doesn't exist.
* That is equivalent to having a pagelsn of 0,
* so we would not have to undo anything. In
* this case, don't bother creating a page.
*/
*lsnp = argp->prev_lsn;
ret = 0;
goto out;
} else if ((ret =
memp_fget(mpf, &argp->next_pgno,
DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->nextlsn);
change = 0;
if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) ||
(cmp_n == 0 && !redo && argp->opcode == DELOVFL)) {
/* Redo a create new page or undo a delete new page. */
pagep->prev_pgno = argp->new_pgno;
change = 1;
} else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) ||
(cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) {
/* Redo a delete or undo a create new page. */
pagep->prev_pgno = argp->prev_pgno;
change = 1;
}
if (!change) {
if ((ret =
__ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
goto out;
} else {
LSN(pagep) = redo ? *lsnp : argp->nextlsn;
if ((ret =
__ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
goto out;
}
}
*lsnp = argp->prev_lsn;
out: if (getmeta)
RELEASE_META(file_dbp, hashp);
REC_CLOSE;
}
/*
* __ham_replace_recover --
* This log message refers to partial puts that are local to a single
* page. You can think of them as special cases of the more general
* insdel log message.
*
* PUBLIC: int __ham_replace_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__ham_replace_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__ham_replace_args *argp;
DB *mdbp, *file_dbp;
DB_MPOOLFILE *mpf;
DBT dbt;
HKEYDATA *hk;
HTAB *hashp;
PAGE *pagep;
int32_t grow;
int change, cmp_n, cmp_p, getmeta, ret;
getmeta = 0;
hashp = NULL; /* XXX: shut the compiler up. */
REC_PRINT(__ham_replace_print);
REC_INTRO(__ham_replace_read);
ret = memp_fget(mpf, &argp->pgno, 0, &pagep);
if (ret != 0)
if (!redo) {
/*
* We are undoing and the page doesn't exist. That
* is equivalent to having a pagelsn of 0, so we
* would not have to undo anything. In this case,
* don't bother creating a page.
*/
*lsnp = argp->prev_lsn;
ret = 0;
goto out;
} else if ((ret = memp_fget(mpf, &argp->pgno,
DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
hashp = (HTAB *)file_dbp->internal;
GET_META(file_dbp, hashp);
getmeta = 1;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
if (cmp_p == 0 && redo) {
change = 1;
/* Reapply the change as specified. */
dbt.data = argp->newitem.data;
dbt.size = argp->newitem.size;
grow = argp->newitem.size - argp->olditem.size;
LSN(pagep) = *lsnp;
} else if (cmp_n == 0 && !redo) {
change = 1;
/* Undo the already applied change. */
dbt.data = argp->olditem.data;
dbt.size = argp->olditem.size;
grow = argp->olditem.size - argp->newitem.size;
LSN(pagep) = argp->pagelsn;
} else {
change = 0;
grow = 0;
}
if (change) {
__ham_onpage_replace(pagep,
file_dbp->pgsize, argp->ndx, argp->off, grow, &dbt);
if (argp->makedup) {
hk = GET_HKEYDATA(pagep, argp->ndx);
if (redo)
hk->type = H_DUPLICATE;
else
hk->type = H_KEYDATA;
}
}
if ((ret = __ham_put_page(file_dbp, pagep, change)) != 0)
goto out;
*lsnp = argp->prev_lsn;
out: if (getmeta)
RELEASE_META(file_dbp, hashp);
REC_CLOSE;
}
/*
* __ham_newpgno_recover --
* This log message is used when allocating or deleting an overflow
* page. It takes care of modifying the meta data.
*
* PUBLIC: int __ham_newpgno_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__ham_newpgno_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__ham_newpgno_args *argp;
DB *mdbp, *file_dbp;
DB_MPOOLFILE *mpf;
HTAB *hashp;
PAGE *pagep;
int change, cmp_n, cmp_p, getmeta, ret;
getmeta = 0;
hashp = NULL; /* XXX: shut the compiler up. */
REC_PRINT(__ham_newpgno_print);
REC_INTRO(__ham_newpgno_read);
hashp = (HTAB *)file_dbp->internal;
GET_META(file_dbp, hashp);
getmeta = 1;
/*
* There are two phases to the recovery here. First we need
* to update the meta data; then we need to update the page.
* We'll do the meta-data first.
*/
cmp_n = log_compare(lsnp, &hashp->hdr->lsn);
cmp_p = log_compare(&hashp->hdr->lsn, &argp->metalsn);
change = 0;
if ((cmp_p == 0 && redo && argp->opcode == ALLOCPGNO) ||
(cmp_n == 0 && !redo && argp->opcode == DELPGNO)) {
/* Need to redo an allocation or undo a deletion. */
hashp->hdr->last_freed = argp->free_pgno;
if (redo && argp->old_pgno != 0) /* Must be ALLOCPGNO */
hashp->hdr->spares[hashp->hdr->ovfl_point]++;
change = 1;
} else if (cmp_p == 0 && redo && argp->opcode == DELPGNO) {
/* Need to redo a deletion */
hashp->hdr->last_freed = argp->pgno;
change = 1;
} else if (cmp_n == 0 && !redo && argp->opcode == ALLOCPGNO) {
/* undo an allocation. */
if (argp->old_pgno == 0)
hashp->hdr->last_freed = argp->pgno;
else {
hashp->hdr->spares[hashp->hdr->ovfl_point]--;
hashp->hdr->last_freed = 0;
}
change = 1;
}
if (change) {
hashp->hdr->lsn = redo ? *lsnp : argp->metalsn;
F_SET(file_dbp, DB_HS_DIRTYMETA);
}
/* Now check the newly allocated/freed page. */
ret = memp_fget(mpf, &argp->pgno, 0, &pagep);
if (ret != 0)
if (!redo) {
/*
* We are undoing and the page doesn't exist. That
* is equivalent to having a pagelsn of 0, so we
* would not have to undo anything. In this case,
* don't bother creating a page.
*/
*lsnp = argp->prev_lsn;
ret = 0;
goto out;
} else if ((ret = memp_fget(mpf, &argp->pgno,
DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
change = 0;
if (cmp_p == 0 && redo && argp->opcode == ALLOCPGNO) {
/* Need to redo an allocation. */
P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID,
PGNO_INVALID, 0, argp->new_type);
change = 1;
} else if (cmp_n == 0 && !redo && argp->opcode == DELPGNO) {
/* Undoing a delete. */
P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID,
argp->old_pgno, 0, argp->old_type);
change = 1;
} else if ((cmp_p == 0 && redo && argp->opcode == DELPGNO) ||
(cmp_n == 0 && !redo && argp->opcode == ALLOCPGNO)) {
/* Need to redo a deletion or undo an allocation. */
NEXT_PGNO(pagep) = argp->free_pgno;
TYPE(pagep) = P_INVALID;
change = 1;
}
if (change)
LSN(pagep) = redo ? *lsnp : argp->pagelsn;
if ((ret = __ham_put_page(file_dbp, pagep, change)) != 0)
goto out;
*lsnp = argp->prev_lsn;
out: if (getmeta)
RELEASE_META(file_dbp, hashp);
REC_CLOSE;
}
/*
* __ham_splitmeta_recover --
* This is the meta-data part of the split. Records the new and old
* bucket numbers and the new/old mask information.
*
* PUBLIC: int __ham_splitmeta_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__ham_splitmeta_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__ham_splitmeta_args *argp;
DB *mdbp, *file_dbp;
DB_MPOOLFILE *mpf;
HTAB *hashp;
int change, cmp_n, cmp_p, getmeta, ret;
u_int32_t pow;
getmeta = 0;
hashp = NULL; /* XXX: shut the compiler up. */
REC_PRINT(__ham_splitmeta_print);
REC_INTRO(__ham_splitmeta_read);
hashp = (HTAB *)file_dbp->internal;
GET_META(file_dbp, hashp);
getmeta = 1;
/*
* There are two phases to the recovery here. First we need
* to update the meta data; then we need to update the page.
* We'll do the meta-data first.
*/
cmp_n = log_compare(lsnp, &hashp->hdr->lsn);
cmp_p = log_compare(&hashp->hdr->lsn, &argp->metalsn);
change = 0;
if (cmp_p == 0 && redo) {
/* Need to redo the split information. */
hashp->hdr->max_bucket = argp->bucket + 1;
pow = __db_log2(hashp->hdr->max_bucket + 1);
if (pow > hashp->hdr->ovfl_point) {
hashp->hdr->spares[pow] =
hashp->hdr->spares[hashp->hdr->ovfl_point];
hashp->hdr->ovfl_point = pow;
}
if (hashp->hdr->max_bucket > hashp->hdr->high_mask) {
hashp->hdr->low_mask = hashp->hdr->high_mask;
hashp->hdr->high_mask =
hashp->hdr->max_bucket | hashp->hdr->low_mask;
}
change = 1;
} else if (cmp_n == 0 && !redo) {
/* Need to undo the split information. */
hashp->hdr->max_bucket = argp->bucket;
hashp->hdr->ovfl_point = argp->ovflpoint;
hashp->hdr->spares[hashp->hdr->ovfl_point] = argp->spares;
pow = 1 << __db_log2(hashp->hdr->max_bucket + 1);
hashp->hdr->high_mask = pow - 1;
hashp->hdr->low_mask = (pow >> 1) - 1;
change = 1;
}
if (change) {
hashp->hdr->lsn = redo ? *lsnp : argp->metalsn;
F_SET(file_dbp, DB_HS_DIRTYMETA);
}
*lsnp = argp->prev_lsn;
out: if (getmeta)
RELEASE_META(file_dbp, hashp);
REC_CLOSE;
}
/*
* __ham_splitdata_recover --
*
* PUBLIC: int __ham_splitdata_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__ham_splitdata_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__ham_splitdata_args *argp;
DB *mdbp, *file_dbp;
DB_MPOOLFILE *mpf;
HTAB *hashp;
PAGE *pagep;
int change, cmp_n, cmp_p, getmeta, ret;
getmeta = 0;
hashp = NULL; /* XXX: shut the compiler up. */
REC_PRINT(__ham_splitdata_print);
REC_INTRO(__ham_splitdata_read);
ret = memp_fget(mpf, &argp->pgno, 0, &pagep);
if (ret != 0)
if (!redo) {
/*
* We are undoing and the page doesn't exist. That
* is equivalent to having a pagelsn of 0, so we
* would not have to undo anything. In this case,
* don't bother creating a page.
*/
*lsnp = argp->prev_lsn;
ret = 0;
goto out;
} else if ((ret = memp_fget(mpf, &argp->pgno,
DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
hashp = (HTAB *)file_dbp->internal;
GET_META(file_dbp, hashp);
getmeta = 1;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
/*
* There are two types of log messages here, one for the old page
* and one for the new pages created. The original image in the
* SPLITOLD record is used for undo. The image in the SPLITNEW
* is used for redo. We should never have a case where there is
* a redo operation and the SPLITOLD record is on disk, but not
* the SPLITNEW record. Therefore, we only have work to do when
* redo NEW messages and undo OLD messages, but we have to update
* LSNs in both cases.
*/
change = 0;
if (cmp_p == 0 && redo) {
if (argp->opcode == SPLITNEW)
/* Need to redo the split described. */
memcpy(pagep, argp->pageimage.data,
argp->pageimage.size);
LSN(pagep) = *lsnp;
change = 1;
} else if (cmp_n == 0 && !redo) {
if (argp->opcode == SPLITOLD) {
/* Put back the old image. */
memcpy(pagep, argp->pageimage.data,
argp->pageimage.size);
} else
P_INIT(pagep, file_dbp->pgsize, argp->pgno,
PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
LSN(pagep) = argp->pagelsn;
change = 1;
}
if ((ret = __ham_put_page(file_dbp, pagep, change)) != 0)
goto out;
*lsnp = argp->prev_lsn;
out: if (getmeta)
RELEASE_META(file_dbp, hashp);
REC_CLOSE;
}
/*
* __ham_ovfl_recover --
* This message is generated when we initialize a set of overflow pages.
*
* PUBLIC: int __ham_ovfl_recover
* PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
*/
int
__ham_ovfl_recover(logp, dbtp, lsnp, redo, info)
DB_LOG *logp;
DBT *dbtp;
DB_LSN *lsnp;
int redo;
void *info;
{
__ham_ovfl_args *argp;
DB *mdbp, *file_dbp;
DB_MPOOLFILE *mpf;
HTAB *hashp;
PAGE *pagep;
db_pgno_t max_pgno, pgno;
int cmp_n, cmp_p, getmeta, ret;
getmeta = 0;
hashp = NULL; /* XXX: shut the compiler up. */
REC_PRINT(__ham_ovfl_print);
REC_INTRO(__ham_ovfl_read);
hashp = (HTAB *)file_dbp->internal;
GET_META(file_dbp, hashp);
getmeta = 1;
file_dbp = NULL;
cmp_n = log_compare(lsnp, &hashp->hdr->lsn);
cmp_p = log_compare(&hashp->hdr->lsn, &argp->metalsn);
if (cmp_p == 0 && redo) {
/* Redo the allocation. */
hashp->hdr->last_freed = argp->start_pgno;
hashp->hdr->spares[argp->npages - 1] += argp->npages;
hashp->hdr->lsn = *lsnp;
F_SET(file_dbp, DB_HS_DIRTYMETA);
} else if (cmp_n == 0 && !redo) {
hashp->hdr->last_freed = argp->free_pgno;
hashp->hdr->spares[argp->npages - 1] -= argp->npages;
hashp->hdr->lsn = argp->metalsn;
F_SET(file_dbp, DB_HS_DIRTYMETA);
}
max_pgno = argp->start_pgno + argp->npages - 1;
ret = 0;
for (pgno = argp->start_pgno; pgno <= max_pgno; pgno++) {
ret = memp_fget(mpf, &pgno, 0, &pagep);
if (ret != 0) {
if (redo && (ret = memp_fget(mpf, &pgno,
DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
else if (!redo) {
(void)__ham_put_page(file_dbp, pagep, 0);
continue;
}
}
if (redo && log_compare((const DB_LSN *)lsnp,
(const DB_LSN *)&LSN(pagep)) > 0) {
P_INIT(pagep, file_dbp->pgsize, pgno, PGNO_INVALID,
pgno == max_pgno ? argp->free_pgno : pgno + 1,
0, P_HASH);
LSN(pagep) = *lsnp;
ret = __ham_put_page(file_dbp, pagep, 1);
} else if (!redo) {
ZERO_LSN(pagep->lsn);
ret = __ham_put_page(file_dbp, pagep, 1);
} else
ret = __ham_put_page(file_dbp, pagep, 0);
if (ret)
goto out;
}
*lsnp = argp->prev_lsn;
out: if (getmeta)
RELEASE_META(file_dbp, hashp);
REC_CLOSE;
}

58
db2/hash/hash_stat.c Normal file
View File

@ -0,0 +1,58 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)hash_stat.c 10.6 (Sleepycat) 7/2/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "hash.h"
#include "common_ext.h"
/*
* __ham_stat --
* Gather/print the hash statistics.
*
* PUBLIC: int __ham_stat __P((DB *, FILE *));
*/
int
__ham_stat(dbp, fp)
DB *dbp;
FILE *fp;
{
HTAB *hashp;
int i;
hashp = (HTAB *)dbp->internal;
fprintf(fp, "hash: accesses %lu collisions %lu\n",
hashp->hash_accesses, hashp->hash_collisions);
fprintf(fp, "hash: expansions %lu\n", hashp->hash_expansions);
fprintf(fp, "hash: overflows %lu\n", hashp->hash_overflows);
fprintf(fp, "hash: big key/data pages %lu\n", hashp->hash_bigpages);
SET_LOCKER(dbp, NULL);
GET_META(dbp, hashp);
fprintf(fp, "keys %lu maxp %lu\n",
(u_long)hashp->hdr->nelem, (u_long)hashp->hdr->max_bucket);
for (i = 0; i < NCACHED; i++)
fprintf(fp,
"spares[%d] = %lu\n", i, (u_long)hashp->hdr->spares[i]);
RELEASE_META(dbp, hashp);
return (0);
}

312
db2/include/btree.h Normal file
View File

@ -0,0 +1,312 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
* Keith Bostic. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)btree.h 10.16 (Sleepycat) 8/24/97
*/
/* Forward structure declarations. */
struct __btree; typedef struct __btree BTREE;
struct __cursor; typedef struct __cursor CURSOR;
struct __epg; typedef struct __epg EPG;
struct __rcursor; typedef struct __rcursor RCURSOR;
struct __recno; typedef struct __recno RECNO;
#undef DEFMINKEYPAGE /* Minimum keys per page */
#define DEFMINKEYPAGE (2)
#undef ISINTERNAL /* If an internal page. */
#define ISINTERNAL(p) (TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO)
#undef ISLEAF /* If a leaf page. */
#define ISLEAF(p) (TYPE(p) == P_LBTREE || TYPE(p) == P_LRECNO)
/* Allocate and discard thread structures. */
#define GETHANDLE(dbp, set_txn, dbpp, ret) { \
if (F_ISSET(dbp, DB_AM_THREAD)) { \
if ((ret = __db_gethandle(dbp, __bam_bdup, dbpp)) != 0) \
return (ret); \
} else \
*dbpp = dbp; \
*dbpp->txn = set_txn; \
}
#define PUTHANDLE(dbp) { \
dbp->txn = NULL; \
if (F_ISSET(dbp, DB_AM_THREAD)) \
__db_puthandle(dbp); \
}
/*
* If doing transactions we have to hold the locks associated with a data item
* from a page for the entire transaction. However, we don't have to hold the
* locks associated with walking the tree. Distinguish between the two so that
* we don't tie up the internal pages of the tree longer than necessary.
*/
#define __BT_LPUT(dbp, lock) \
(F_ISSET((dbp), DB_AM_LOCKING) ? \
lock_put((dbp)->dbenv->lk_info, lock) : 0)
#define __BT_TLPUT(dbp, lock) \
(F_ISSET((dbp), DB_AM_LOCKING) && (dbp)->txn == NULL ? \
lock_put((dbp)->dbenv->lk_info, lock) : 0)
/*
* Flags to __bt_search() and __rec_search().
*
* Note, internal page searches must find the largest record less than key in
* the tree so that descents work. Leaf page searches must find the smallest
* record greater than key so that the returned index is the record's correct
* position for insertion.
*
* The flags parameter to the search routines describes three aspects of the
* search: the type of locking required (including if we're locking a pair of
* pages), the item to return in the presence of duplicates and whether or not
* to return deleted entries. To simplify both the mnemonic representation
* and the code that checks for various cases, we construct a set of bitmasks.
*/
#define S_READ 0x0001 /* Read locks. */
#define S_WRITE 0x0002 /* Write locks. */
#define S_APPEND 0x0040 /* Append to the tree. */
#define S_DELNO 0x0080 /* Don't return deleted items. */
#define S_DUPFIRST 0x0100 /* Return first duplicate. */
#define S_DUPLAST 0x0200 /* Return last duplicate. */
#define S_EXACT 0x0400 /* Exact items only. */
#define S_PARENT 0x0800 /* Lock page pair. */
#define S_DELETE (S_WRITE | S_DUPFIRST | S_DELNO | S_EXACT)
#define S_FIND (S_READ | S_DUPFIRST | S_DELNO)
#define S_INSERT (S_WRITE | S_DUPLAST)
#define S_KEYFIRST (S_WRITE | S_DUPFIRST)
#define S_KEYLAST (S_WRITE | S_DUPLAST)
#define S_WRPAIR (S_WRITE | S_DUPLAST | S_PARENT)
/*
* Flags to __bam_iitem().
*/
#define BI_NEWKEY 0x01 /* New key. */
#define BI_DELETED 0x02 /* Key/data pair only placeholder. */
/*
* Various routines pass around page references. A page reference can be a
* pointer to the page or a page number; for either, an indx can designate
* an item on the page.
*/
struct __epg {
PAGE *page; /* The page. */
db_indx_t indx; /* The index on the page. */
DB_LOCK lock; /* The page's lock. */
};
/*
* Btree cursor.
*
* Arguments passed to __bam_ca_replace().
*/
typedef enum {
REPLACE_SETUP,
REPLACE_SUCCESS,
REPLACE_FAILED
} ca_replace_arg;
struct __cursor {
DBC *dbc; /* Enclosing DBC. */
PAGE *page; /* Cursor page. */
db_pgno_t pgno; /* Page. */
db_indx_t indx; /* Page item ref'd by the cursor. */
db_pgno_t dpgno; /* Duplicate page. */
db_indx_t dindx; /* Page item ref'd by the cursor. */
DB_LOCK lock; /* Cursor read lock. */
db_lockmode_t mode; /* Lock mode. */
/*
* If a cursor record is deleted, the key/data pair has to remain on
* the page so that subsequent inserts/deletes don't interrupt the
* cursor progression through the file. This results in interesting
* cases when "standard" operations, e.g., dbp->put() are done in the
* context of "deleted" cursors.
*
* C_DELETED -- The item referenced by the cursor has been "deleted"
* but not physically removed from the page.
* C_REPLACE -- The "deleted" item referenced by a cursor has been
* replaced by a dbp->put(), so the cursor is no longer
* responsible for physical removal from the page.
* C_REPLACE_SETUP --
* We are about to overwrite a "deleted" item, flag any
* cursors referencing it for transition to C_REPLACE
* state.
*/
#define C_DELETED 0x0001
#define C_REPLACE 0x0002
#define C_REPLACE_SETUP 0x0004
u_int32_t flags;
};
/*
* Recno cursor.
*
* Arguments passed to __ram_ca().
*/
typedef enum {
CA_DELETE,
CA_IAFTER,
CA_IBEFORE
} ca_recno_arg;
struct __rcursor {
DBC *dbc; /* Enclosing DBC. */
db_recno_t recno; /* Current record number. */
/*
* Cursors referencing "deleted" records are positioned between
* two records, and so must be specially adjusted until they are
* moved.
*/
#define CR_DELETED 0x0001 /* Record deleted. */
u_int32_t flags;
};
/*
* We maintain a stack of the pages that we're locking in the tree. Btree's
* (currently) only save two levels of the tree at a time, so the default
* stack is always large enough. Recno trees have to lock the entire tree to
* do inserts/deletes, however. Grow the stack as necessary.
*/
#undef BT_STK_CLR
#define BT_STK_CLR(t) \
((t)->bt_csp = (t)->bt_sp)
#undef BT_STK_ENTER
#define BT_STK_ENTER(t, pagep, page_indx, lock, ret) do { \
if ((ret = \
(t)->bt_csp == (t)->bt_esp ? __bam_stkgrow(t) : 0) == 0) { \
(t)->bt_csp->page = pagep; \
(t)->bt_csp->indx = page_indx; \
(t)->bt_csp->lock = lock; \
} \
} while (0)
#undef BT_STK_PUSH
#define BT_STK_PUSH(t, pagep, page_indx, lock, ret) do { \
BT_STK_ENTER(t, pagep, page_indx, lock, ret); \
++(t)->bt_csp; \
} while (0)
#undef BT_STK_POP
#define BT_STK_POP(t) \
((t)->bt_csp == (t)->bt_stack ? NULL : --(t)->bt_csp)
/*
* The in-memory recno data structure.
*
* !!!
* These fields are ignored as far as multi-threading is concerned. There
* are no transaction semantics associated with backing files, nor is there
* any thread protection.
*/
#undef RECNO_OOB
#define RECNO_OOB 0 /* Illegal record number. */
struct __recno {
int re_delim; /* Variable-length delimiting byte. */
int re_pad; /* Fixed-length padding byte. */
u_int32_t re_len; /* Length for fixed-length records. */
char *re_source; /* Source file name. */
int re_fd; /* Source file descriptor */
db_recno_t re_last; /* Last record number read. */
void *re_cmap; /* Current point in mapped space. */
void *re_smap; /* Start of mapped space. */
void *re_emap; /* End of mapped space. */
size_t re_msize; /* Size of mapped region. */
/* Recno input function. */
int (*re_irec) __P((DB *, db_recno_t));
#define RECNO_EOF 0x0001 /* EOF on backing source file. */
#define RECNO_MODIFIED 0x0002 /* Tree was modified. */
u_int32_t flags;
};
/*
* The in-memory btree data structure.
*/
struct __btree {
/*
* These fields are per-thread and are initialized when the BTREE structure
* is created.
*/
db_pgno_t bt_lpgno; /* Last insert location. */
DBT bt_rkey; /* Returned key. */
DBT bt_rdata; /* Returned data. */
EPG *bt_sp; /* Stack pointer. */
EPG *bt_csp; /* Current stack entry. */
EPG *bt_esp; /* End stack pointer. */
EPG bt_stack[5];
RECNO *bt_recno; /* Private recno structure. */
DB_BTREE_LSTAT lstat; /* Btree local statistics. */
/*
* These fields are copied from the original BTREE structure and never
* change.
*/
db_indx_t bt_maxkey; /* Maximum keys per page. */
db_indx_t bt_minkey; /* Minimum keys per page. */
int (*bt_compare) /* Comparison function. */
__P((const DBT *, const DBT *));
size_t(*bt_prefix) /* Prefix function. */
__P((const DBT *, const DBT *));
db_indx_t bt_ovflsize; /* Maximum key/data on-page size. */
};
#include "btree_auto.h"
#include "btree_ext.h"
#include "db_am.h"
#include "common_ext.h"

108
db2/include/btree_auto.h Normal file
View File

@ -0,0 +1,108 @@
/* Do not edit: automatically built by dist/db_gen.sh. */
#ifndef bam_AUTO_H
#define bam_AUTO_H
#define DB_bam_pg_alloc (DB_bam_BEGIN + 1)
typedef struct _bam_pg_alloc_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t fileid;
DB_LSN meta_lsn;
DB_LSN page_lsn;
db_pgno_t pgno;
u_int32_t ptype;
db_pgno_t next;
} __bam_pg_alloc_args;
#define DB_bam_pg_free (DB_bam_BEGIN + 2)
typedef struct _bam_pg_free_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t fileid;
db_pgno_t pgno;
DB_LSN meta_lsn;
DBT header;
db_pgno_t next;
} __bam_pg_free_args;
#define DB_bam_split (DB_bam_BEGIN + 3)
typedef struct _bam_split_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t fileid;
db_pgno_t left;
DB_LSN llsn;
db_pgno_t right;
DB_LSN rlsn;
u_int32_t indx;
db_pgno_t npgno;
DB_LSN nlsn;
DBT pg;
} __bam_split_args;
#define DB_bam_rsplit (DB_bam_BEGIN + 4)
typedef struct _bam_rsplit_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t fileid;
db_pgno_t pgno;
DBT pgdbt;
DBT rootent;
DB_LSN rootlsn;
} __bam_rsplit_args;
#define DB_bam_adj (DB_bam_BEGIN + 5)
typedef struct _bam_adj_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t fileid;
db_pgno_t pgno;
DB_LSN lsn;
u_int32_t indx;
u_int32_t indx_copy;
u_int32_t is_insert;
} __bam_adj_args;
#define DB_bam_cadjust (DB_bam_BEGIN + 6)
typedef struct _bam_cadjust_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t fileid;
db_pgno_t pgno;
DB_LSN lsn;
u_int32_t indx;
int32_t adjust;
int32_t total;
} __bam_cadjust_args;
#define DB_bam_cdel (DB_bam_BEGIN + 7)
typedef struct _bam_cdel_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t fileid;
db_pgno_t pgno;
DB_LSN lsn;
u_int32_t indx;
} __bam_cdel_args;
#endif

121
db2/include/btree_ext.h Normal file
View File

@ -0,0 +1,121 @@
/* Do not edit: automatically built by dist/distrib. */
int __bam_close __P((DB *));
int __bam_sync __P((DB *, int));
int __bam_cmp __P((DB *, const DBT *, EPG *));
int __bam_defcmp __P((const DBT *, const DBT *));
size_t __bam_defpfx __P((const DBT *, const DBT *));
int __bam_pgin __P((db_pgno_t, void *, DBT *));
int __bam_pgout __P((db_pgno_t, void *, DBT *));
int __bam_mswap __P((PAGE *));
int __bam_cursor __P((DB *, DB_TXN *, DBC **));
int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
int __bam_ovfl_chk __P((DB *, CURSOR *, u_int32_t, int));
int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *));
void __bam_ca_di __P((DB *, db_pgno_t, u_int32_t, int));
void __bam_ca_dup __P((DB *,
db_pgno_t, u_int32_t, u_int32_t, db_pgno_t, u_int32_t));
void __bam_ca_move __P((DB *, BTREE *, db_pgno_t, db_pgno_t));
void __bam_ca_replace
__P((DB *, db_pgno_t, u_int32_t, ca_replace_arg));
void __bam_ca_split __P((DB *,
db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t, int));
int __bam_delete __P((DB *, DB_TXN *, DBT *, int));
int __ram_delete __P((DB *, DB_TXN *, DBT *, int));
int __bam_ditem __P((DB *, PAGE *, u_int32_t));
int __bam_adjindx __P((DB *, PAGE *, u_int32_t, u_int32_t, int));
int __bam_dpage __P((DB *, const DBT *));
int __bam_open __P((DB *, DBTYPE, DB_INFO *));
int __bam_bdup __P((DB *, DB *));
int __bam_new __P((DB *, u_int32_t, PAGE **));
int __bam_free __P((DB *, PAGE *));
int __bam_lget __P((DB *, int, db_pgno_t, db_lockmode_t, DB_LOCK *));
int __bam_lput __P((DB *, DB_LOCK));
int __bam_pget __P((DB *, PAGE **, db_pgno_t *, int));
int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
int __bam_iitem __P((DB *,
PAGE **, db_indx_t *, DBT *, DBT *, int, int));
int __bam_pg_alloc_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __bam_pg_free_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __bam_split_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __bam_rsplit_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __bam_adj_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __bam_cadjust_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __bam_cdel_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __ram_open __P((DB *, DBTYPE, DB_INFO *));
int __ram_cursor __P((DB *, DB_TXN *, DBC **));
int __ram_close __P((DB *));
void __ram_ca __P((DB *, db_recno_t, ca_recno_arg));
int __ram_getno __P((DB *, const DBT *, db_recno_t *, int));
int __ram_snapshot __P((DB *));
int __bam_rsearch __P((DB *, db_recno_t *, u_int, int, int *));
int __bam_adjust __P((DB *, BTREE *, int));
int __bam_nrecs __P((DB *, db_recno_t *));
db_recno_t __bam_total __P((PAGE *));
int __bam_search __P((DB *,
const DBT *, u_int, int, db_recno_t *, int *));
int __bam_stkrel __P((DB *));
int __bam_stkgrow __P((BTREE *));
int __bam_split __P((DB *, void *));
int __bam_broot __P((DB *, PAGE *, PAGE *, PAGE *));
int __ram_root __P((DB *, PAGE *, PAGE *, PAGE *));
int __bam_copy __P((DB *, PAGE *, PAGE *, u_int32_t, u_int32_t));
int __bam_stat __P((DB *, void *, void *(*)(size_t), int));
void __bam_add_mstat __P((DB_BTREE_LSTAT *, DB_BTREE_LSTAT *));
int __bam_pg_alloc_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, DB_LSN *, DB_LSN *, db_pgno_t,
u_int32_t, db_pgno_t));
int __bam_pg_alloc_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __bam_pg_alloc_read __P((void *, __bam_pg_alloc_args **));
int __bam_pg_free_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, db_pgno_t, DB_LSN *, DBT *,
db_pgno_t));
int __bam_pg_free_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __bam_pg_free_read __P((void *, __bam_pg_free_args **));
int __bam_split_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t,
DB_LSN *, u_int32_t, db_pgno_t, DB_LSN *,
DBT *));
int __bam_split_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __bam_split_read __P((void *, __bam_split_args **));
int __bam_rsplit_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, db_pgno_t, DBT *, DBT *,
DB_LSN *));
int __bam_rsplit_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __bam_rsplit_read __P((void *, __bam_rsplit_args **));
int __bam_adj_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, db_pgno_t, DB_LSN *, u_int32_t,
u_int32_t, u_int32_t));
int __bam_adj_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __bam_adj_read __P((void *, __bam_adj_args **));
int __bam_cadjust_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, db_pgno_t, DB_LSN *, u_int32_t,
int32_t, int32_t));
int __bam_cadjust_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __bam_cadjust_read __P((void *, __bam_cadjust_args **));
int __bam_cdel_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, db_pgno_t, DB_LSN *, u_int32_t));
int __bam_cdel_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __bam_cdel_read __P((void *, __bam_cdel_args **));
int __bam_init_print __P((DB_ENV *));
int __bam_init_recover __P((DB_ENV *));

65
db2/include/clib_ext.h Normal file
View File

@ -0,0 +1,65 @@
/* Do not edit: automatically built by dist/distrib. */
#ifdef __STDC__
void err __P((int eval, const char *, ...));
#else
void err();
#endif
#ifdef __STDC__
void errx __P((int eval, const char *, ...));
#else
void errx();
#endif
#ifdef __STDC__
void warn __P((const char *, ...));
#else
void warn();
#endif
#ifdef __STDC__
void warnx __P((const char *, ...));
#else
void warnx();
#endif
#ifndef HAVE_GETCWD
char *getcwd __P((char *, size_t));
#endif
void get_long __P((char *, long, long, long *));
#ifndef HAVE_GETOPT
int getopt __P((int, char * const *, const char *));
#endif
#ifndef HAVE_MEMCMP
int memcmp __P((const void *, const void *, size_t));
#endif
#ifndef HAVE_MEMCPY
void *memcpy __P((void *, const void *, size_t));
#endif
#ifndef HAVE_MEMMOVE
void *memmove __P((void *, const void *, size_t));
#endif
#ifndef HAVE_MEMCPY
void *memcpy __P((void *, const void *, size_t));
#endif
#ifndef HAVE_MEMMOVE
void *memmove __P((void *, const void *, size_t));
#endif
#ifndef HAVE_RAISE
int raise __P((int));
#endif
#ifndef HAVE_SNPRINTF
#ifdef __STDC__
int snprintf __P((char *, size_t, const char *, ...));
#else
int snprintf();
#endif
#endif
#ifndef HAVE_STRDUP
char *strdup __P((const char *));
#endif
#ifndef HAVE_STRERROR
char *strerror __P((int));
#endif
#ifndef HAVE_STRSEP
char *strsep __P((char **, const char *));
#endif
#ifndef HAVE_VSNPRINTF
int vsnprintf();
#endif

41
db2/include/common_ext.h Normal file
View File

@ -0,0 +1,41 @@
/* Do not edit: automatically built by dist/distrib. */
int __db_appname __P((DB_ENV *,
APPNAME, const char *, const char *, int *, char **));
int __db_apprec __P((DB_ENV *, int));
int __db_byteorder __P((DB_ENV *, int));
#ifdef __STDC__
void __db_err __P((const DB_ENV *dbenv, const char *fmt, ...));
#else
void __db_err();
#endif
int __db_panic __P((DB *));
int __db_fchk __P((DB_ENV *, const char *, int, int));
int __db_fcchk __P((DB_ENV *, const char *, int, int, int));
int __db_cdelchk __P((const DB *, int, int, int));
int __db_cgetchk __P((const DB *, DBT *, DBT *, int, int));
int __db_cputchk __P((const DB *,
const DBT *, DBT *, int, int, int));
int __db_delchk __P((const DB *, int, int));
int __db_getchk __P((const DB *, const DBT *, DBT *, int));
int __db_putchk __P((const DB *, DBT *, const DBT *, int, int, int));
int __db_statchk __P((const DB *, int));
int __db_syncchk __P((const DB *, int));
int __db_ferr __P((const DB_ENV *, const char *, int));
u_int32_t __db_log2 __P((u_int32_t));
int __db_rcreate __P((DB_ENV *, APPNAME,
const char *, const char *, int, size_t, int *, void *));
int __db_ropen __P((DB_ENV *,
APPNAME, const char *, const char *, int, int *, void *));
int __db_rclose __P((DB_ENV *, int, void *));
int __db_runlink __P((DB_ENV *,
APPNAME, const char *, const char *, int));
int __db_rgrow __P((DB_ENV *, int, size_t));
int __db_rremap __P((DB_ENV *, void *, size_t, size_t, int, void *));
void __db_shalloc_init __P((void *, size_t));
int __db_shalloc __P((void *, size_t, size_t, void *));
void __db_shalloc_free __P((void *, void *));
size_t __db_shalloc_count __P((void *));
size_t __db_shsizeof __P((void *));
void __db_shalloc_dump __P((FILE *, void *));
int __db_tablesize __P((int));
void __db_hashinit __P((void *, int));

118
db2/include/cxx_int.h Normal file
View File

@ -0,0 +1,118 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1997
* Sleepycat Software. All rights reserved.
*
* @(#)cxx_int.h 10.4 (Sleepycat) 8/22/97
*/
#ifndef _CXX_INT_H_
#define _CXX_INT_H_
// private data structures known to the implementation only
#include <assert.h> // used by defines below
//
// Using FooImp classes will allow the implementation to change in the
// future without any modification to user code or even to header files
// that the user includes. FooImp * is just like void * except that it
// provides a little extra protection, since you cannot randomly assign
// any old pointer to a FooImp* as you can with void *. Currently, a
// pointer to such an opaque class is always just a pointer to the
// appropriate underlying implementation struct. These are converted
// back and forth using the various overloaded wrap()/unwrap() methods.
// This is essentially a use of the "Bridge" Design Pattern.
//
// WRAPPED_CLASS implements the appropriate wrap() and unwrap() methods
// for a wrapper class that has an underlying pointer representation.
//
#define WRAPPED_CLASS(_WRAPPER_CLASS, _IMP_CLASS, _WRAPPED_TYPE) \
\
class _IMP_CLASS {}; \
\
inline _WRAPPED_TYPE unwrap(_WRAPPER_CLASS *val) \
{ \
if (!val) return 0; \
return (_WRAPPED_TYPE)(val->imp()); \
} \
\
inline const _WRAPPED_TYPE unwrapConst(const _WRAPPER_CLASS *val) \
{ \
if (!val) return 0; \
return (const _WRAPPED_TYPE)(val->imp()); \
} \
\
inline _IMP_CLASS *wrap(_WRAPPED_TYPE val) \
{ \
return (_IMP_CLASS*)val; \
}
WRAPPED_CLASS(DbLockTab, DbLockTabImp, DB_LOCKTAB*)
WRAPPED_CLASS(DbLog, DbLogImp, DB_LOG*)
WRAPPED_CLASS(DbMpool, DbMpoolImp, DB_MPOOL*)
WRAPPED_CLASS(DbMpoolFile, DbMpoolFileImp, DB_MPOOLFILE*)
WRAPPED_CLASS(Db, DbImp, DB*)
WRAPPED_CLASS(DbTxn, DbTxnImp, DB_TXN*)
WRAPPED_CLASS(DbTxnMgr, DbTxnMgrImp, DB_TXNMGR*)
// Macros that handle detected errors, in case we want to
// change the default behavior. runtime_error() throws an
// exception by default.
//
// Since it's unusual to throw an exception in a destructor,
// we have a separate macro. For now, we silently ignore such
// detected errors.
//
#define DB_ERROR(caller, ecode) \
DbEnv::runtime_error(caller, ecode)
#define DB_DESTRUCTOR_ERROR(caller, ecode) \
DbEnv::runtime_error(caller, ecode, 1)
////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////
//
// These defines are for tedious flag or field set/get access methods.
//
// Define setName() and getName() methods that twiddle
// the _flags field.
//
#define DB_FLAG_METHODS(_class, _flags, _cxx_name, _flag_name) \
\
void _class::set##_cxx_name(int onOrOff) \
{ \
if (onOrOff) \
_flags |= _flag_name; \
else \
_flags &= ~(_flag_name); \
} \
\
int _class::get##_cxx_name() const \
{ \
return (_flags & _flag_name) ? 1 : 0; \
}
#define DB_RO_ACCESS(_class, _type, _cxx_name, _field) \
\
_type _class::get_##_cxx_name() const \
{ \
return _field; \
}
#define DB_WO_ACCESS(_class, _type, _cxx_name, _field) \
\
void _class::set_##_cxx_name(_type value) \
{ \
_field = value; \
} \
#define DB_RW_ACCESS(_class, _type, _cxx_name, _field) \
DB_RO_ACCESS(_class, _type, _cxx_name, _field) \
DB_WO_ACCESS(_class, _type, _cxx_name, _field)
#endif /* !_CXX_INT_H_ */

796
db2/include/db.h.src Normal file
View File

@ -0,0 +1,796 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
* @(#)db.h.src 10.67 (Sleepycat) 8/25/97
*/
#ifndef _DB_H_
#define _DB_H_
#ifndef __NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <stdio.h>
#endif
/*
* XXX
* MacOS: ensure that Metrowerks C makes enumeration types int sized.
*/
#ifdef __MWERKS__
#pragma enumsalwaysint on
#endif
/*
* XXX
* Handle function prototypes and the keyword "const". This steps on name
* space that DB doesn't control, but all of the other solutions are worse.
*/
#undef __P
#if defined(__STDC__) || defined(__cplusplus)
#define __P(protos) protos /* ANSI C prototypes */
#else
#define const
#define __P(protos) () /* K&R C preprocessor */
#endif
/*
* !!!
* DB needs basic information about specifically sized types. If they're
* not provided by the system, typedef them here.
*
* We protect them against multiple inclusion using __BIT_TYPES_DEFINED__,
* as does BIND and Kerberos, since we don't know for sure what #include
* files the user is using.
*
* !!!
* We also provide the standard u_int, u_long etc., if they're not provided
* by the system. This isn't completely necessary, but the example programs
* need them.
*/
#ifndef __BIT_TYPES_DEFINED__
#define __BIT_TYPES_DEFINED__
@u_int8_decl@
@int16_decl@
@u_int16_decl@
@int32_decl@
@u_int32_decl@
#endif
@u_char_decl@
@u_short_decl@
@u_int_decl@
@u_long_decl@
#define DB_VERSION_MAJOR 2
#define DB_VERSION_MINOR 3
#define DB_VERSION_PATCH 4
#define DB_VERSION_STRING "Sleepycat Software: DB 2.3.4: (8/20/97)"
typedef u_int32_t db_pgno_t; /* Page number type. */
typedef u_int16_t db_indx_t; /* Page offset type. */
#define DB_MAX_PAGES 0xffffffff /* >= # of pages in a file */
typedef u_int32_t db_recno_t; /* Record number type. */
typedef size_t DB_LOCK; /* Object returned by lock manager. */
#define DB_MAX_RECORDS 0xffffffff /* >= # of records in a tree */
#define DB_FILE_ID_LEN 20 /* DB file ID length. */
/* Forward structure declarations, so applications get type checking. */
struct __db; typedef struct __db DB;
#ifdef DB_DBM_HSEARCH
typedef struct __db DBM;
#endif
struct __db_bt_stat; typedef struct __db_bt_stat DB_BTREE_STAT;
struct __db_dbt; typedef struct __db_dbt DBT;
struct __db_env; typedef struct __db_env DB_ENV;
struct __db_info; typedef struct __db_info DB_INFO;
struct __db_lockregion; typedef struct __db_lockregion DB_LOCKREGION;
struct __db_lockreq; typedef struct __db_lockreq DB_LOCKREQ;
struct __db_locktab; typedef struct __db_locktab DB_LOCKTAB;
struct __db_log; typedef struct __db_log DB_LOG;
struct __db_lsn; typedef struct __db_lsn DB_LSN;
struct __db_mpool; typedef struct __db_mpool DB_MPOOL;
struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT;
struct __db_mpool_stat; typedef struct __db_mpool_stat DB_MPOOL_STAT;
struct __db_mpoolfile; typedef struct __db_mpoolfile DB_MPOOLFILE;
struct __db_txn; typedef struct __db_txn DB_TXN;
struct __db_txn_active; typedef struct __db_txn_active DB_TXN_ACTIVE;
struct __db_txn_stat; typedef struct __db_txn_stat DB_TXN_STAT;
struct __db_txnmgr; typedef struct __db_txnmgr DB_TXNMGR;
struct __db_txnregion; typedef struct __db_txnregion DB_TXNREGION;
struct __dbc; typedef struct __dbc DBC;
/* Key/data structure -- a Data-Base Thang. */
struct __db_dbt {
void *data; /* key/data */
u_int32_t size; /* key/data length */
u_int32_t ulen; /* RO: length of user buffer. */
u_int32_t dlen; /* RO: get/put record length. */
u_int32_t doff; /* RO: get/put record offset. */
#define DB_DBT_INTERNAL 0x01 /* Perform any mallocs using regular
malloc, not the user's malloc. */
#define DB_DBT_MALLOC 0x02 /* Return in allocated memory. */
#define DB_DBT_PARTIAL 0x04 /* Partial put/get. */
#define DB_DBT_USERMEM 0x08 /* Return in user's memory. */
u_int32_t flags;
};
/*
* Database configuration and initialization.
*/
/*
* Flags understood by both db_open(3) and db_appinit(3).
*/
#define DB_CREATE 0x00001 /* O_CREAT: create file as necessary. */
#define DB_NOMMAP 0x00002 /* Don't mmap underlying file. */
#define DB_THREAD 0x00004 /* Free-thread DB package handles. */
/*
* Flags understood by db_appinit(3).
*
* DB_APP_INIT and DB_MUTEXDEBUG are internal only, and not documented.
*/
/* 0x00007 COMMON MASK. */
#define DB_APP_INIT 0x00008 /* Appinit called, paths initialized. */
#define DB_INIT_LOCK 0x00010 /* Initialize locking. */
#define DB_INIT_LOG 0x00020 /* Initialize logging. */
#define DB_INIT_MPOOL 0x00040 /* Initialize mpool. */
#define DB_INIT_TXN 0x00080 /* Initialize transactions. */
#define DB_MPOOL_PRIVATE 0x00100 /* Mpool: private memory pool. */
#define DB_MUTEXDEBUG 0x00200 /* Do not get/set mutexes in regions. */
#define DB_RECOVER 0x00400 /* Run normal recovery. */
#define DB_RECOVER_FATAL 0x00800 /* Run catastrophic recovery. */
#define DB_TXN_NOSYNC 0x01000 /* Do not sync log on commit. */
#define DB_USE_ENVIRON 0x02000 /* Use the environment. */
#define DB_USE_ENVIRON_ROOT 0x04000 /* Use the environment if root. */
/* CURRENTLY UNUSED LOCK FLAGS. */
#define DB_TXN_LOCK_2PL 0x00000 /* Two-phase locking. */
#define DB_TXN_LOCK_OPTIMISTIC 0x00000 /* Optimistic locking. */
#define DB_TXN_LOCK_MASK 0x00000 /* Lock flags mask. */
/* CURRENTLY UNUSED LOG FLAGS. */
#define DB_TXN_LOG_REDO 0x00000 /* Redo-only logging. */
#define DB_TXN_LOG_UNDO 0x00000 /* Undo-only logging. */
#define DB_TXN_LOG_UNDOREDO 0x00000 /* Undo/redo write-ahead logging. */
#define DB_TXN_LOG_MASK 0x00000 /* Log flags mask. */
/*
* Flags understood by db_open(3).
*
* DB_EXCL and DB_TEMPORARY are internal only, and not documented.
* DB_SEQUENTIAL is currently internal, but likely to be exported some day.
*/
/* 0x00007 COMMON MASK. */
/* 0x07fff ALREADY USED. */
#define DB_EXCL 0x08000 /* O_EXCL: exclusive open. */
#define DB_RDONLY 0x10000 /* O_RDONLY: read-only. */
#define DB_SEQUENTIAL 0x20000 /* Indicate sequential access. */
#define DB_TEMPORARY 0x40000 /* Remove on last close. */
#define DB_TRUNCATE 0x80000 /* O_TRUNCATE: replace existing DB. */
/*
* Deadlock detector modes; used in the DBENV structure to configure the
* locking subsystem.
*/
#define DB_LOCK_NORUN 0x0
#define DB_LOCK_DEFAULT 0x1
#define DB_LOCK_OLDEST 0x2
#define DB_LOCK_RANDOM 0x3
#define DB_LOCK_YOUNGEST 0x4
struct __db_env {
int db_lorder; /* Byte order. */
/* Error message callback. */
void (*db_errcall) __P((const char *, char *));
FILE *db_errfile; /* Error message file stream. */
const char *db_errpfx; /* Error message prefix. */
int db_verbose; /* Generate debugging messages. */
/* User paths. */
char *db_home; /* Database home. */
char *db_log_dir; /* Database log file directory. */
char *db_tmp_dir; /* Database tmp file directory. */
char **db_data_dir; /* Database data file directories. */
int data_cnt; /* Database data file slots. */
int data_next; /* Next Database data file slot. */
/* Locking. */
DB_LOCKTAB *lk_info; /* Return from lock_open(). */
u_int8_t *lk_conflicts; /* Two dimensional conflict matrix. */
int lk_modes; /* Number of lock modes in table. */
unsigned int lk_max; /* Maximum number of locks. */
u_int32_t lk_detect; /* Deadlock detect on every conflict. */
int (*db_yield) __P((void)); /* Yield function for threads. */
/* Logging. */
DB_LOG *lg_info; /* Return from log_open(). */
u_int32_t lg_max; /* Maximum file size. */
/* Memory pool. */
DB_MPOOL *mp_info; /* Return from memp_open(). */
size_t mp_mmapsize; /* Maximum file size for mmap. */
size_t mp_size; /* Bytes in the mpool cache. */
/* Transactions. */
DB_TXNMGR *tx_info; /* Return from txn_open(). */
unsigned int tx_max; /* Maximum number of transactions. */
int (*tx_recover) /* Dispatch function for recovery. */
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
u_int32_t flags; /* Flags. */
};
/*******************************************************
* Access methods.
*******************************************************/
typedef enum {
DB_BTREE=1, /* B+tree. */
DB_HASH, /* Extended Linear Hashing. */
DB_RECNO, /* Fixed and variable-length records. */
DB_UNKNOWN /* Figure it out on open. */
} DBTYPE;
#define DB_BTREEVERSION 6 /* Current btree version. */
#define DB_BTREEOLDVER 6 /* Oldest btree version supported. */
#define DB_BTREEMAGIC 0x053162
#define DB_HASHVERSION 5 /* Current hash version. */
#define DB_HASHOLDVER 4 /* Oldest hash version supported. */
#define DB_HASHMAGIC 0x061561
#define DB_LOGVERSION 2 /* Current log version. */
#define DB_LOGOLDVER 2 /* Oldest log version supported. */
#define DB_LOGMAGIC 0x040988
struct __db_info {
int db_lorder; /* Byte order. */
size_t db_cachesize; /* Underlying cache size. */
size_t db_pagesize; /* Underlying page size. */
/* Local heap allocation. */
void *(*db_malloc) __P((size_t));
/* Btree access method. */
int bt_maxkey; /* Maximum keys per page. */
int bt_minkey; /* Minimum keys per page. */
int (*bt_compare) /* Comparison function. */
__P((const DBT *, const DBT *));
size_t (*bt_prefix) /* Prefix function. */
__P((const DBT *, const DBT *));
/* Hash access method. */
unsigned int h_ffactor; /* Fill factor. */
unsigned int h_nelem; /* Number of elements. */
u_int32_t (*h_hash) /* Hash function. */
__P((const void *, u_int32_t));
/* Recno access method. */
int re_pad; /* Fixed-length padding byte. */
int re_delim; /* Variable-length delimiting byte. */
u_int32_t re_len; /* Length for fixed-length records. */
char *re_source; /* Source file name. */
#define DB_DELIMITER 0x0001 /* Recno: re_delim set. */
#define DB_DUP 0x0002 /* Btree, Hash: duplicate keys. */
#define DB_FIXEDLEN 0x0004 /* Recno: fixed-length records. */
#define DB_PAD 0x0008 /* Recno: re_pad set. */
#define DB_RECNUM 0x0010 /* Btree: record numbers. */
#define DB_RENUMBER 0x0020 /* Recno: renumber on insert/delete. */
#define DB_SNAPSHOT 0x0040 /* Recno: snapshot the input. */
u_int32_t flags;
};
/*
* DB access method and cursor operation codes. These are implemented as
* bit fields for future flexibility, but currently only a single one may
* be specified to any function.
*/
#define DB_AFTER 0x000001 /* c_put() */
#define DB_APPEND 0x000002 /* put() */
#define DB_BEFORE 0x000004 /* c_put() */
#define DB_CHECKPOINT 0x000008 /* log_put(), log_get() */
#define DB_CURRENT 0x000010 /* c_get(), c_put(), log_get() */
#define DB_FIRST 0x000020 /* c_get(), log_get() */
#define DB_FLUSH 0x000040 /* log_put() */
#define DB_GET_RECNO 0x000080 /* c_get() */
#define DB_KEYFIRST 0x000100 /* c_put() */
#define DB_KEYLAST 0x000200 /* c_put() */
#define DB_LAST 0x000400 /* c_get(), log_get() */
#define DB_NEXT 0x000800 /* c_get(), log_get() */
#define DB_NOOVERWRITE 0x001000 /* put() */
#define DB_NOSYNC 0x002000 /* close() */
#define DB_PREV 0x004000 /* c_get(), log_get() */
#define DB_RECORDCOUNT 0x008000 /* stat() */
#define DB_SET 0x010000 /* c_get(), log_get() */
#define DB_SET_RANGE 0x020000 /* c_get() */
#define DB_SET_RECNO 0x040000 /* get(), c_get() */
/* DB (user visible) error return codes. */
#define DB_INCOMPLETE ( -1) /* Sync didn't finish. */
#define DB_KEYEMPTY ( -2) /* The key/data pair was deleted or
was never created by the user. */
#define DB_KEYEXIST ( -3) /* The key/data pair already exists. */
#define DB_LOCK_DEADLOCK ( -4) /* Locker killed to resolve deadlock. */
#define DB_LOCK_NOTGRANTED ( -5) /* Lock unavailable, no-wait set. */
#define DB_LOCK_NOTHELD ( -6) /* Lock not held by locker. */
#define DB_NOTFOUND ( -7) /* Key/data pair not found (EOF). */
/* DB (private) error return codes. */
#define DB_DELETED ( -8) /* Recovery file marked deleted. */
#define DB_NEEDSPLIT ( -9) /* Page needs to be split. */
#define DB_REGISTERED (-10) /* Entry was previously registered. */
#define DB_SWAPBYTES (-11) /* Database needs byte swapping. */
struct __db_ilock { /* Internal DB access method lock. */
db_pgno_t pgno; /* Page being locked. */
/* File id. */
u_int8_t fileid[DB_FILE_ID_LEN];
};
/* DB access method description structure. */
struct __db {
void *mutex; /* Synchronization for free threading */
DBTYPE type; /* DB access method. */
DB_ENV *dbenv; /* DB_ENV structure. */
DB_ENV *mp_dbenv; /* DB_ENV for local mpool creation. */
DB *master; /* Original DB created by db_open. */
void *internal; /* Access method private. */
DB_MPOOL *mp; /* The access method's mpool. */
DB_MPOOLFILE *mpf; /* The access method's mpool file. */
/*
* XXX
* Explicit representations of structures in queue.h.
*
* TAILQ_HEAD(curs_queue, __dbc);
*/
struct {
struct __dbc *tqh_first;
struct __dbc **tqh_last;
} curs_queue;
/*
* XXX
* Explicit representations of structures in queue.h.
*
* LIST_HEAD(handleq, __db);
* LIST_ENTRY(__db);
*/
struct {
struct __db *lh_first;
} handleq; /* List of handles for this DB. */
struct {
struct __db *le_next;
struct __db **le_prev;
} links; /* Links for the handle list. */
u_int32_t log_fileid; /* Logging file id. */
DB_TXN *txn; /* Current transaction. */
u_int32_t locker; /* Default process' locker id. */
DBT lock_dbt; /* DBT referencing lock. */
struct __db_ilock lock; /* Lock. */
size_t pgsize; /* Logical page size of file. */
/* Local heap allocation. */
void *(*db_malloc) __P((size_t));
/* Functions. */
int (*close) __P((DB *, int));
int (*cursor) __P((DB *, DB_TXN *, DBC **));
int (*del) __P((DB *, DB_TXN *, DBT *, int));
int (*fd) __P((DB *, int *));
int (*get) __P((DB *, DB_TXN *, DBT *, DBT *, int));
int (*put) __P((DB *, DB_TXN *, DBT *, DBT *, int));
int (*stat) __P((DB *, void *, void *(*)(size_t), int));
int (*sync) __P((DB *, int));
#define DB_AM_DUP 0x000001 /* DB_DUP (internal). */
#define DB_AM_INMEM 0x000002 /* In-memory; no sync on close. */
#define DB_AM_LOCKING 0x000004 /* Perform locking. */
#define DB_AM_LOGGING 0x000008 /* Perform logging. */
#define DB_AM_MLOCAL 0x000010 /* Database memory pool is local. */
#define DB_AM_PGDEF 0x000020 /* Page size was defaulted. */
#define DB_AM_RDONLY 0x000040 /* Database is readonly. */
#define DB_AM_RECOVER 0x000080 /* In recovery (do not log or lock). */
#define DB_AM_SWAP 0x000100 /* Pages need to be byte-swapped. */
#define DB_AM_THREAD 0x000200 /* DB is multi-threaded. */
#define DB_BT_RECNUM 0x000400 /* DB_RECNUM (internal) */
#define DB_HS_DIRTYMETA 0x000800 /* Hash: Metadata page modified. */
#define DB_RE_DELIMITER 0x001000 /* DB_DELIMITER (internal). */
#define DB_RE_FIXEDLEN 0x002000 /* DB_FIXEDLEN (internal). */
#define DB_RE_PAD 0x004000 /* DB_PAD (internal). */
#define DB_RE_RENUMBER 0x008000 /* DB_RENUMBER (internal). */
#define DB_RE_SNAPSHOT 0x010000 /* DB_SNAPSHOT (internal). */
u_int32_t flags;
};
/* Cursor description structure. */
struct __dbc {
DB *dbp; /* Related DB access method. */
DB_TXN *txn; /* Associated transaction. */
/*
* XXX
* Explicit representations of structures in queue.h.
*
* TAILQ_ENTRY(__dbc);
*/
struct {
struct __dbc *tqe_next;
struct __dbc **tqe_prev;
} links;
void *internal; /* Access method private. */
int (*c_close) __P((DBC *));
int (*c_del) __P((DBC *, int));
int (*c_get) __P((DBC *, DBT *, DBT *, int));
int (*c_put) __P((DBC *, DBT *, DBT *, int));
};
/* Btree/recno statistics structure. */
struct __db_bt_stat {
u_int32_t bt_flags; /* Open flags. */
u_int32_t bt_maxkey; /* Maxkey value. */
u_int32_t bt_minkey; /* Minkey value. */
u_int32_t bt_re_len; /* Fixed-length record length. */
u_int32_t bt_re_pad; /* Fixed-length record pad. */
u_int32_t bt_pagesize; /* Page size. */
u_int32_t bt_levels; /* Tree levels. */
u_int32_t bt_nrecs; /* Number of records. */
u_int32_t bt_int_pg; /* Internal pages. */
u_int32_t bt_leaf_pg; /* Leaf pages. */
u_int32_t bt_dup_pg; /* Duplicate pages. */
u_int32_t bt_over_pg; /* Overflow pages. */
u_int32_t bt_free; /* Pages on the free list. */
u_int32_t bt_freed; /* Pages freed for reuse. */
u_int32_t bt_int_pgfree; /* Bytes free in internal pages. */
u_int32_t bt_leaf_pgfree; /* Bytes free in leaf pages. */
u_int32_t bt_dup_pgfree; /* Bytes free in duplicate pages. */
u_int32_t bt_over_pgfree; /* Bytes free in overflow pages. */
u_int32_t bt_pfxsaved; /* Bytes saved by prefix compression. */
u_int32_t bt_split; /* Total number of splits. */
u_int32_t bt_rootsplit; /* Root page splits. */
u_int32_t bt_fastsplit; /* Fast splits. */
u_int32_t bt_added; /* Items added. */
u_int32_t bt_deleted; /* Items deleted. */
u_int32_t bt_get; /* Items retrieved. */
u_int32_t bt_cache_hit; /* Hits in fast-insert code. */
u_int32_t bt_cache_miss; /* Misses in fast-insert code. */
};
#if defined(__cplusplus)
extern "C" {
#endif
int db_appinit __P((const char *, char * const *, DB_ENV *, int));
int db_appexit __P((DB_ENV *));
int db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **));
char *db_version __P((int *, int *, int *));
#if defined(__cplusplus)
};
#endif
/*******************************************************
* Locking
*******************************************************/
#define DB_LOCKVERSION 1
#define DB_LOCKMAGIC 0x090193
/* Flag values for lock_vec(). */
#define DB_LOCK_NOWAIT 0x01 /* Don't wait on unavailable lock. */
/* Flag values for lock_detect(). */
#define DB_LOCK_CONFLICT 0x01 /* Run on any conflict. */
/* Request types. */
typedef enum {
DB_LOCK_DUMP, /* Display held locks. */
DB_LOCK_GET, /* Get the lock. */
DB_LOCK_PUT, /* Release the lock. */
DB_LOCK_PUT_ALL, /* Release locker's locks. */
DB_LOCK_PUT_OBJ /* Release locker's locks on obj. */
} db_lockop_t;
/* Simple R/W lock modes and for multi-granularity intention locking. */
typedef enum {
DB_LOCK_NG=0, /* Not granted. */
DB_LOCK_READ, /* Shared/read. */
DB_LOCK_WRITE, /* Exclusive/write. */
DB_LOCK_IREAD, /* Intent to share/read. */
DB_LOCK_IWRITE, /* Intent exclusive/write. */
DB_LOCK_IWR /* Intent to read and write. */
} db_lockmode_t;
/* Lock request structure. */
struct __db_lockreq {
db_lockop_t op; /* Operation. */
db_lockmode_t mode; /* Requested mode. */
u_int32_t locker; /* Locker identity. */
DBT *obj; /* Object being locked. */
DB_LOCK lock; /* Lock returned. */
};
/*
* Commonly used conflict matrices.
*
* Standard Read/Write (or exclusive/shared) locks.
*/
#define DB_LOCK_RW_N 3
extern const u_int8_t db_rw_conflicts[];
/* Multi-granularity locking. */
#define DB_LOCK_RIW_N 6
extern const u_int8_t db_riw_conflicts[];
#if defined(__cplusplus)
extern "C" {
#endif
int lock_close __P((DB_LOCKTAB *));
int lock_detect __P((DB_LOCKTAB *, int, u_int32_t));
int lock_get __P((DB_LOCKTAB *,
u_int32_t, int, const DBT *, db_lockmode_t, DB_LOCK *));
int lock_id __P((DB_LOCKTAB *, u_int32_t *));
int lock_open __P((const char *, int, int, DB_ENV *, DB_LOCKTAB **));
int lock_put __P((DB_LOCKTAB *, DB_LOCK));
int lock_unlink __P((const char *, int, DB_ENV *));
int lock_vec __P((DB_LOCKTAB *,
u_int32_t, int, DB_LOCKREQ *, int, DB_LOCKREQ **));
#if defined(__cplusplus)
};
#endif
/*******************************************************
* Logging.
*******************************************************/
/* Flag values for log_archive(). */
#define DB_ARCH_ABS 0x001 /* Absolute pathnames. */
#define DB_ARCH_DATA 0x002 /* Data files. */
#define DB_ARCH_LOG 0x004 /* Log files. */
/*
* A DB_LSN has two parts, a fileid which identifies a specific file, and an
* offset within that file. The fileid is an unsigned 4-byte quantity that
* uniquely identifies a file within the log directory -- currently a simple
* counter inside the log. The offset is also an unsigned 4-byte value. The
* log manager guarantees the offset is never more than 4 bytes by switching
* to a new log file before the maximum length imposed by an unsigned 4-byte
* offset is reached.
*/
struct __db_lsn {
u_int32_t file; /* File ID. */
u_int32_t offset; /* File offset. */
};
#if defined(__cplusplus)
extern "C" {
#endif
int log_archive __P((DB_LOG *, char **[], int, void *(*)(size_t)));
int log_close __P((DB_LOG *));
int log_compare __P((const DB_LSN *, const DB_LSN *));
int log_file __P((DB_LOG *, const DB_LSN *, char *, size_t));
int log_flush __P((DB_LOG *, const DB_LSN *));
int log_get __P((DB_LOG *, DB_LSN *, DBT *, int));
int log_open __P((const char *, int, int, DB_ENV *, DB_LOG **));
int log_put __P((DB_LOG *, DB_LSN *, const DBT *, int));
int log_register __P((DB_LOG *, DB *, const char *, DBTYPE, u_int32_t *));
int log_unlink __P((const char *, int, DB_ENV *));
int log_unregister __P((DB_LOG *, u_int32_t));
#if defined(__cplusplus)
};
#endif
/*******************************************************
* Mpool
*******************************************************/
/* Flag values for memp_fget(). */
#define DB_MPOOL_CREATE 0x001 /* Create a page. */
#define DB_MPOOL_LAST 0x002 /* Return the last page. */
#define DB_MPOOL_NEW 0x004 /* Create a new page. */
/* Flag values for memp_fput(), memp_fset(). */
#define DB_MPOOL_CLEAN 0x001 /* Clear modified bit. */
#define DB_MPOOL_DIRTY 0x002 /* Page is modified. */
#define DB_MPOOL_DISCARD 0x004 /* Don't cache the page. */
/* Mpool statistics structure. */
struct __db_mpool_stat {
size_t st_cachesize; /* Cache size. */
unsigned long st_cache_hit; /* Pages found in the cache. */
unsigned long st_cache_miss; /* Pages not found in the cache. */
unsigned long st_map; /* Pages from mapped files. */
unsigned long st_page_create; /* Pages created in the cache. */
unsigned long st_page_in; /* Pages read in. */
unsigned long st_page_out; /* Pages written out. */
unsigned long st_ro_evict; /* Read-only pages evicted. */
unsigned long st_rw_evict; /* Read-write pages evicted. */
unsigned long st_hash_buckets; /* Number of hash buckets. */
unsigned long st_hash_searches; /* Total hash chain searches. */
unsigned long st_hash_longest; /* Longest hash chain searched. */
unsigned long st_hash_examined; /* Total hash entries searched. */
};
/* Mpool file statistics structure. */
struct __db_mpool_fstat {
char *file_name; /* File name. */
size_t st_pagesize; /* Page size. */
unsigned long st_cache_hit; /* Pages found in the cache. */
unsigned long st_cache_miss; /* Pages not found in the cache. */
unsigned long st_map; /* Pages from mapped files. */
unsigned long st_page_create; /* Pages created in the cache. */
unsigned long st_page_in; /* Pages read in. */
unsigned long st_page_out; /* Pages written out. */
};
#if defined(__cplusplus)
extern "C" {
#endif
int memp_close __P((DB_MPOOL *));
int memp_fclose __P((DB_MPOOLFILE *));
int memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, unsigned long, void *));
int memp_fopen __P((DB_MPOOL *, const char *,
int, int, int, size_t, int, DBT *, u_int8_t *, DB_MPOOLFILE **));
int memp_fput __P((DB_MPOOLFILE *, void *, unsigned long));
int memp_fset __P((DB_MPOOLFILE *, void *, unsigned long));
int memp_fsync __P((DB_MPOOLFILE *));
int memp_open __P((const char *, int, int, DB_ENV *, DB_MPOOL **));
int memp_register __P((DB_MPOOL *, int,
int (*)(db_pgno_t, void *, DBT *),
int (*)(db_pgno_t, void *, DBT *)));
int memp_stat __P((DB_MPOOL *,
DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, void *(*)(size_t)));
int memp_sync __P((DB_MPOOL *, DB_LSN *));
int memp_unlink __P((const char *, int, DB_ENV *));
#if defined(__cplusplus)
};
#endif
/*******************************************************
* Transactions.
*******************************************************/
#define DB_TXNVERSION 1
#define DB_TXNMAGIC 0x041593
/* Operations values to the tx_recover() function. */
#define DB_TXN_BACKWARD_ROLL 1 /* Read the log backwards. */
#define DB_TXN_FORWARD_ROLL 2 /* Read the log forwards. */
#define DB_TXN_OPENFILES 3 /* Read for open files. */
#define DB_TXN_REDO 4 /* Redo the operation. */
#define DB_TXN_UNDO 5 /* Undo the operation. */
/* Internal transaction status values. */
/* Transaction statistics structure. */
struct __db_txn_active {
u_int32_t txnid; /* Transaction ID */
DB_LSN lsn; /* Lsn of the begin record */
};
struct __db_txn_stat {
DB_LSN st_last_ckp; /* lsn of the last checkpoint */
DB_LSN st_pending_ckp; /* last checkpoint did not finish */
time_t st_time_ckp; /* time of last checkpoint */
u_int32_t st_last_txnid; /* last transaction id given out */
u_int32_t st_maxtxns; /* maximum number of active txns */
u_int32_t st_naborts; /* number of aborted transactions */
u_int32_t st_nbegins; /* number of begun transactions */
u_int32_t st_ncommits; /* number of committed transactions */
u_int32_t st_nactive; /* number of active transactions */
DB_TXN_ACTIVE *st_txnarray; /* array of active transactions */
};
#if defined(__cplusplus)
extern "C" {
#endif
int txn_abort __P((DB_TXN *));
int txn_begin __P((DB_TXNMGR *, DB_TXN *, DB_TXN **));
int txn_checkpoint __P((const DB_TXNMGR *, long, long));
int txn_commit __P((DB_TXN *));
int txn_close __P((DB_TXNMGR *));
u_int32_t txn_id __P((DB_TXN *));
int txn_open __P((const char *, int, int, DB_ENV *, DB_TXNMGR **));
int txn_prepare __P((DB_TXN *));
int txn_stat __P((DB_TXNMGR *, DB_TXN_STAT **, void *(*)(size_t)));
int txn_unlink __P((const char *, int, DB_ENV *));
#if defined(__cplusplus)
};
#endif
#ifdef DB_DBM_HSEARCH
/*******************************************************
* Dbm/Ndbm historic interfaces.
*******************************************************/
#define DBM_INSERT 0 /* Flags to dbm_store(). */
#define DBM_REPLACE 1
/*
* The db(3) support for ndbm(3) always appends this suffix to the
* file name to avoid overwriting the user's original database.
*/
#define DBM_SUFFIX ".db"
typedef struct {
char *dptr;
int dsize;
} datum;
#if defined(__cplusplus)
extern "C" {
#endif
int dbminit __P((char *));
#if !defined(__cplusplus)
int delete __P((datum));
#endif
datum fetch __P((datum));
datum firstkey __P((void));
datum nextkey __P((datum));
int store __P((datum, datum));
/*
* !!!
* Don't prototype:
*
* dbm_clearerr(DBM *db);
* dbm_dirfno(DBM *db);
* dbm_error(DBM *db);
* dbm_pagfno(DBM *db);
* dbm_rdonly(DBM *db);
*
* they weren't documented and were historically implemented as #define's.
*/
void dbm_close __P((DBM *));
int dbm_delete __P((DBM *, datum));
datum dbm_fetch __P((DBM *, datum));
datum dbm_firstkey __P((DBM *));
long dbm_forder __P((DBM *, datum));
datum dbm_nextkey __P((DBM *));
DBM *dbm_open __P((const char *, int, int));
int dbm_store __P((DBM *, datum, datum, int));
#if defined(__cplusplus)
};
#endif
/*******************************************************
* Hsearch historic interface.
*******************************************************/
typedef enum {
FIND, ENTER
} ACTION;
typedef struct entry {
char *key;
void *data;
} ENTRY;
#if defined(__cplusplus)
extern "C" {
#endif
int hcreate __P((unsigned int));
void hdestroy __P((void));
ENTRY *hsearch __P((ENTRY, ACTION));
#if defined(__cplusplus)
};
#endif
#endif /* DB_DBM_HSEARCH */
/*
* XXX
* MacOS: Reset Metrowerks C enum sizes.
*/
#ifdef __MWERKS__
#pragma enumsalwaysint reset
#endif
#endif /* !_DB_H_ */

170
db2/include/db_185.h.src Normal file
View File

@ -0,0 +1,170 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)db_185.h.src 8.3 (Sleepycat) 7/27/97
*/
#ifndef _DB_185_H_
#define _DB_185_H_
#include <sys/types.h>
#include <limits.h>
/*
* XXX
* Handle function prototypes and the keyword "const". This steps on name
* space that DB doesn't control, but all of the other solutions are worse.
*/
#undef __P
#if defined(__STDC__) || defined(__cplusplus)
#define __P(protos) protos /* ANSI C prototypes */
#else
#define const
#define __P(protos) () /* K&R C preprocessor */
#endif
#define RET_ERROR -1 /* Return values. */
#define RET_SUCCESS 0
#define RET_SPECIAL 1
#ifndef __BIT_TYPES_DEFINED__
#define __BIT_TYPES_DEFINED__
@u_int8_decl@
@int16_decl@
@u_int16_decl@
@int32_decl@
@u_int32_decl@
#endif
#define MAX_PAGE_NUMBER 0xffffffff /* >= # of pages in a file */
typedef u_int32_t pgno_t;
#define MAX_PAGE_OFFSET 65535 /* >= # of bytes in a page */
typedef u_int16_t indx_t;
#define MAX_REC_NUMBER 0xffffffff /* >= # of records in a tree */
typedef u_int32_t recno_t;
/* Key/data structure -- a Data-Base Thang. */
typedef struct {
void *data; /* data */
size_t size; /* data length */
} DBT;
/* Routine flags. */
#define R_CURSOR 1 /* del, put, seq */
#define __R_UNUSED 2 /* UNUSED */
#define R_FIRST 3 /* seq */
#define R_IAFTER 4 /* put (RECNO) */
#define R_IBEFORE 5 /* put (RECNO) */
#define R_LAST 6 /* seq (BTREE, RECNO) */
#define R_NEXT 7 /* seq */
#define R_NOOVERWRITE 8 /* put */
#define R_PREV 9 /* seq (BTREE, RECNO) */
#define R_SETCURSOR 10 /* put (RECNO) */
#define R_RECNOSYNC 11 /* sync (RECNO) */
typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
/* Access method description structure. */
typedef struct __db {
DBTYPE type; /* Underlying db type. */
int (*close) __P((struct __db *));
int (*del) __P((const struct __db *, const DBT *, u_int));
int (*get) __P((const struct __db *, const DBT *, DBT *, u_int));
int (*put) __P((const struct __db *, DBT *, const DBT *, u_int));
int (*seq) __P((const struct __db *, DBT *, DBT *, u_int));
int (*sync) __P((const struct __db *, u_int));
void *internal; /* Access method private. */
int (*fd) __P((const struct __db *));
} DB;
#define BTREEMAGIC 0x053162
#define BTREEVERSION 3
/* Structure used to pass parameters to the btree routines. */
typedef struct {
#define R_DUP 0x01 /* duplicate keys */
u_long flags;
u_int cachesize; /* bytes to cache */
int maxkeypage; /* maximum keys per page */
int minkeypage; /* minimum keys per page */
u_int psize; /* page size */
int (*compare) /* comparison function */
__P((const DBT *, const DBT *));
size_t (*prefix) /* prefix function */
__P((const DBT *, const DBT *));
int lorder; /* byte order */
} BTREEINFO;
#define HASHMAGIC 0x061561
#define HASHVERSION 2
/* Structure used to pass parameters to the hashing routines. */
typedef struct {
u_int bsize; /* bucket size */
u_int ffactor; /* fill factor */
u_int nelem; /* number of elements */
u_int cachesize; /* bytes to cache */
u_int32_t /* hash function */
(*hash) __P((const void *, size_t));
int lorder; /* byte order */
} HASHINFO;
/* Structure used to pass parameters to the record routines. */
typedef struct {
#define R_FIXEDLEN 0x01 /* fixed-length records */
#define R_NOKEY 0x02 /* key not required */
#define R_SNAPSHOT 0x04 /* snapshot the input */
u_long flags;
u_int cachesize; /* bytes to cache */
u_int psize; /* page size */
int lorder; /* byte order */
size_t reclen; /* record length (fixed-length records) */
u_char bval; /* delimiting byte (variable-length records */
char *bfname; /* btree file name */
} RECNOINFO;
#if defined(__cplusplus)
extern "C" {
#endif
DB *dbopen __P((const char *, int, int, DBTYPE, const void *));
#if defined(__cplusplus)
};
#endif
#endif /* !_DB_185_H_ */

87
db2/include/db_am.h Normal file
View File

@ -0,0 +1,87 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
* @(#)db_am.h 10.5 (Sleepycat) 8/22/97
*/
#ifndef _DB_AM_H
#define _DB_AM_H
#define DB_ISBIG 0x01
#define DB_ADD_DUP 0x10
#define DB_REM_DUP 0x20
#define DB_ADD_BIG 0x30
#define DB_REM_BIG 0x40
#define DB_SPLITOLD 0x50
#define DB_SPLITNEW 0x60
/*
* Standard initialization and shutdown macros for all recovery functions.
*
* Requires the following local variables:
*
* DB *file_dbp, *mdbp;
* DB_MPOOLFILE *mpf;
* int ret;
*/
#define REC_INTRO(func) { \
file_dbp = mdbp = NULL; \
if ((ret = func(dbtp->data, &argp)) != 0) \
goto out; \
if (__db_fileid_to_db(logp, &mdbp, argp->fileid)) { \
if (ret == DB_DELETED) \
ret = 0; \
goto out; \
} \
if (mdbp == NULL) \
goto out; \
if (F_ISSET(mdbp, DB_AM_THREAD)) { \
if ((ret = __db_gethandle(mdbp, \
mdbp->type == DB_HASH ? __ham_hdup : __bam_bdup, \
&file_dbp)) != 0) \
goto out; \
} else \
file_dbp = mdbp; \
F_SET(file_dbp, DB_AM_RECOVER); \
mpf = file_dbp->mpf; \
}
#define REC_CLOSE { \
if (argp != NULL) \
free (argp); \
if (file_dbp != NULL) { \
F_CLR(file_dbp, DB_AM_RECOVER); \
if (F_ISSET(file_dbp, DB_AM_THREAD)) \
__db_puthandle(file_dbp); \
} \
return (ret); \
}
/*
* No-op versions of the same macros.
*/
#define REC_NOOP_INTRO(func) { \
if ((ret = func(dbtp->data, &argp)) != 0) \
return (ret); \
}
#define REC_NOOP_CLOSE { \
if (argp != NULL) \
free (argp); \
return (ret); \
}
/*
* Standard debugging macro for all recovery functions.
*/
#ifdef DEBUG_RECOVER
#define REC_PRINT(func) \
(void)func(logp, dbtp, lsnp, redo, info);
#else
#define REC_PRINT(func) \
info = info; /* XXX: Shut the compiler up. */
#endif
#include "db_auto.h"
#include "db_ext.h"
#endif

118
db2/include/db_auto.h Normal file
View File

@ -0,0 +1,118 @@
/* Do not edit: automatically built by dist/db_gen.sh. */
#ifndef db_AUTO_H
#define db_AUTO_H
#define DB_db_addrem (DB_db_BEGIN + 1)
typedef struct _db_addrem_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t opcode;
u_int32_t fileid;
db_pgno_t pgno;
u_int32_t indx;
size_t nbytes;
DBT hdr;
DBT dbt;
DB_LSN pagelsn;
} __db_addrem_args;
#define DB_db_split (DB_db_BEGIN + 2)
typedef struct _db_split_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t opcode;
u_int32_t fileid;
db_pgno_t pgno;
DBT pageimage;
DB_LSN pagelsn;
} __db_split_args;
#define DB_db_big (DB_db_BEGIN + 3)
typedef struct _db_big_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t opcode;
u_int32_t fileid;
db_pgno_t pgno;
db_pgno_t prev_pgno;
db_pgno_t next_pgno;
DBT dbt;
DB_LSN pagelsn;
DB_LSN prevlsn;
DB_LSN nextlsn;
} __db_big_args;
#define DB_db_ovref (DB_db_BEGIN + 4)
typedef struct _db_ovref_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t fileid;
db_pgno_t pgno;
DB_LSN lsn;
} __db_ovref_args;
#define DB_db_relink (DB_db_BEGIN + 5)
typedef struct _db_relink_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t fileid;
db_pgno_t pgno;
DB_LSN lsn;
db_pgno_t prev;
DB_LSN lsn_prev;
db_pgno_t next;
DB_LSN lsn_next;
} __db_relink_args;
#define DB_db_addpage (DB_db_BEGIN + 6)
typedef struct _db_addpage_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t fileid;
db_pgno_t pgno;
DB_LSN lsn;
db_pgno_t nextpgno;
DB_LSN nextlsn;
} __db_addpage_args;
#define DB_db_debug (DB_db_BEGIN + 7)
typedef struct _db_debug_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
DBT op;
u_int32_t fileid;
DBT key;
DBT data;
u_int32_t arg_flags;
} __db_debug_args;
#define DB_db_noop (DB_db_BEGIN + 8)
typedef struct _db_noop_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
} __db_noop_args;
#endif

888
db2/include/db_cxx.h Normal file
View File

@ -0,0 +1,888 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1997
* Sleepycat Software. All rights reserved.
*
* @(#)db_cxx.h 10.7 (Sleepycat) 8/22/97
*/
#ifndef _DB_CXX_H_
#define _DB_CXX_H_
//
// C++ assumptions:
//
// To ensure portability to many platforms, both new and old, we make
// few assumptions about the C++ compiler and library. For example,
// we do not expect STL, templates or namespaces to be available. The
// "newest" C++ feature used is exceptions, which are used liberally
// to transmit error information. Even the use of exceptions can be
// disabled at runtime, see setErrorModel().
//
// C++ naming conventions:
//
// - All top level class names start with Db.
// - All class members start with lower case letter.
// - All private data members are suffixed with underscore.
// - Use underscores to divide names into multiple words.
// - Simple data accessors are named with get_ or set_ prefix.
// - All method names are taken from names of functions in the C
// layer of db (usually by dropping a prefix like "db_").
// These methods have the same argument types and order,
// other than dropping the explicit arg that acts as "this".
//
// As a rule, each DbFoo object has exactly one underlying DB_FOO struct
// (defined in db.h) associated with it. In many cases, we inherit directly
// from the DB_FOO structure to make this relationship explicit. Often,
// the underlying C layer allocates and deallocates these structures, so
// there is no easy way to add any data to the DbFoo class. When you see
// a comment about whether data is permitted to be added, this is what
// is going on. Of course, if we need to add data to such C++ classes
// in the future, we will arrange to have an indirect pointer to the
// DB_FOO struct (as some of the classes already have).
//
////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////
//
// Forward declarations
//
#include "db.h"
class Db; // forward
class Dbc; // forward
class DbEnv; // forward
class DbException; // forward
class DbInfo; // forward
class DbLock; // forward
class DbLockTab; // forward
class DbLog; // forward
class DbLsn; // forward
class DbMpool; // forward
class DbMpoolFile; // forward
class Dbt; // forward
class DbTxn; // forward
class DbTxnMgr; // forward
////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////
//
// Mechanisms for declaring classes
//
//
// Every class defined in this file has an _exported next to the class name.
// This is needed for WinTel machines so that the class methods can
// be exported or imported in a DLL as appropriate. Users of the DLL
// use the define DB_USE_DLL. When the DLL is built, DB_CREATE_DLL
// must be defined.
//
#if defined(_MSC_VER)
# if defined(DB_CREATE_DLL)
# define _exported __declspec(dllexport) // creator of dll
# elif defined(DB_USE_DLL)
# define _exported __declspec(dllimport) // user of dll
# else
# define _exported // static lib creator or user
# endif
#else
# define _exported
#endif
// DEFINE_DB_CLASS defines an imp_ data member and imp() accessor.
// The underlying type is a pointer to an opaque *Imp class, that
// gets converted to the correct implementation class by the implementation.
//
// Since these defines use "private/public" labels, and leave the access
// being "private", we always use these by convention before any data
// members in the private section of a class. Keeping them in the
// private section also emphasizes that they are off limits to user code.
//
#define DEFINE_DB_CLASS(name) \
public: class name##Imp* imp() { return imp_; } \
public: const class name##Imp* imp() const { return imp_; } \
private: class name##Imp* imp_
////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////
//
// Turn off inappropriate compiler warnings
//
#ifdef _MSC_VER
// These are level 4 warnings that are explicitly disabled.
// With Visual C++, by default you do not see above level 3 unless
// you use /W4. But we like to compile with the highest level
// warnings to catch other errors.
//
// 4201: nameless struct/union
// triggered by standard include file <winnt.h>
//
// 4514: unreferenced inline function has been removed
// certain include files in MSVC define methods that are not called
//
#pragma warning(disable: 4201 4514)
#endif
////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////
//
// Exception classes
//
// Almost any error in the DB library throws a DbException.
// Every exception should be considered an abnormality
// (e.g. bug, misuse of DB, file system error).
//
// NOTE: We would like to inherit from class exception and
// let it handle what(), but there are
// MSVC++ problems when <exception> is included.
//
class _exported DbException
{
public:
virtual ~DbException();
DbException(int err);
DbException(const char *description);
DbException(const char *prefix, int err);
DbException(const char *prefix1, const char *prefix2, int err);
const int get_errno();
virtual const char *what() const;
DbException(const DbException &);
DbException &operator = (const DbException &);
private:
char *what_;
int err_; // errno
};
////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////
//
// Lock classes
//
class _exported DbLock
{
friend DbLockTab;
public:
DbLock(unsigned int);
DbLock();
unsigned int get_lock_id();
void set_lock_id(unsigned int);
int put(DbLockTab *locktab);
DbLock(const DbLock &);
DbLock &operator = (const DbLock &);
protected:
// We can add data to this class if needed
// since its contained class is not allocated by db.
// (see comment at top)
DB_LOCK lock_;
};
class _exported DbLockTab
{
friend DbEnv;
public:
int close();
int detect(int atype, u_int32_t flags);
int get(u_int32_t locker, int flags, const Dbt *obj,
db_lockmode_t lock_mode, DbLock *lock);
int id(u_int32_t *idp);
int vec(u_int32_t locker, int flags, DB_LOCKREQ list[],
int nlist, DB_LOCKREQ **elistp);
// Create or remove new locktab files
//
static int open(const char *dir, int flags, int mode,
DbEnv* dbenv, DbLockTab **regionp);
static int unlink(const char *dir, int force, DbEnv* dbenv);
private:
// We can add data to this class if needed
// since it is implemented via a pointer.
// (see comment at top)
// copying not allowed
//
DbLockTab(const DbLockTab &);
DbLockTab &operator = (const DbLockTab &);
// Note: use DbLockTab::open() or DbEnv::get_lk_info()
// to get pointers to a DbLockTab,
// and call DbLockTab::close() rather than delete to release them.
//
DbLockTab();
~DbLockTab();
DEFINE_DB_CLASS(DbLockTab);
};
////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////
//
// Log classes
//
class _exported DbLsn : protected DB_LSN
{
friend DbLog; // friendship needed to cast to base class
friend DbMpool;
};
class _exported DbLog
{
friend DbEnv;
public:
int archive(char **list[], int flags, void *(*db_malloc)(size_t));
int close();
static int compare(const DbLsn *lsn0, const DbLsn *lsn1);
int file(DbLsn *lsn, char *namep, int len);
int flush(const DbLsn *lsn);
int get(DbLsn *lsn, Dbt *data, int flags);
int put(DbLsn *lsn, const Dbt *data, int flags);
// Normally these would be called register and unregister to
// parallel the C interface, but "register" is a reserved word.
//
int db_register(Db *dbp, const char *name, u_int32_t *fidp);
int db_unregister(u_int32_t fid);
// Create or remove new log files
//
static int open(const char *dir, int flags, int mode,
DbEnv* dbenv, DbLog **regionp);
static int unlink(const char *dir, int force, DbEnv* dbenv);
private:
// We can add data to this class if needed
// since it is implemented via a pointer.
// (see comment at top)
// Note: use DbLog::open() or DbEnv::get_lg_info()
// to get pointers to a DbLog,
// and call DbLog::close() rather than delete to release them.
//
DbLog();
~DbLog();
// no copying
DbLog(const DbLog &);
operator = (const DbLog &);
DEFINE_DB_CLASS(DbLog);
};
////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////
//
// Memory pool classes
//
class _exported DbMpoolFile
{
public:
int close();
int get(db_pgno_t *pgnoaddr, unsigned long flags, void *pagep);
int put(void *pgaddr, unsigned long flags);
int set(void *pgaddr, unsigned long flags);
int sync();
static int open(DbMpool *mp, const char *file,
int ftype, int flags, int mode,
size_t pagesize, int lsn_offset,
Dbt *pgcookie, u_int8_t *uid, DbMpoolFile **mpf);
private:
// We can add data to this class if needed
// since it is implemented via a pointer.
// (see comment at top)
// Note: use DbMpoolFile::open()
// to get pointers to a DbMpoolFile,
// and call DbMpoolFile::close() rather than delete to release them.
//
DbMpoolFile();
// Shut g++ up.
protected:
~DbMpoolFile();
private:
// no copying
DbMpoolFile(const DbMpoolFile &);
operator = (const DbMpoolFile &);
DEFINE_DB_CLASS(DbMpoolFile);
};
class _exported DbMpool
{
friend DbEnv;
public:
int close();
// access to low level interface
// Normally this would be called register to parallel
// the C interface, but "register" is a reserved word.
//
int db_register(int ftype,
int (*pgin)(db_pgno_t pgno, void *pgaddr, DBT *pgcookie),
int (*pgout)(db_pgno_t pgno, void *pgaddr, DBT *pgcookie));
int stat(DB_MPOOL_STAT **gsp, DB_MPOOL_FSTAT ***fsp,
void *(*db_malloc)(size_t));
int sync(DbLsn *lsn);
// Create or remove new mpool files
//
static int open(const char *dir, int flags, int mode,
DbEnv* dbenv, DbMpool **regionp);
static int unlink(const char *dir, int force, DbEnv* dbenv);
private:
// We can add data to this class if needed
// since it is implemented via a pointer.
// (see comment at top)
// Note: use DbMpool::open() or DbEnv::get_mp_info()
// to get pointers to a DbMpool,
// and call DbMpool::close() rather than delete to release them.
//
DbMpool();
~DbMpool();
// no copying
DbMpool(const DbMpool &);
DbMpool &operator = (const DbMpool &);
DEFINE_DB_CLASS(DbMpool);
};
////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////
//
// Transaction classes
//
class _exported DbTxnMgr
{
friend DbEnv;
public:
int begin(DbTxn *pid, DbTxn **tid);
int checkpoint(long kbyte, long min) const;
int close();
int stat(DB_TXN_STAT **statp, void *(*db_malloc)(size_t));
// Create or remove new txnmgr files
//
static int open(const char *dir, int flags, int mode,
DbEnv* dbenv, DbTxnMgr **regionp);
static int unlink(const char *dir, int force, DbEnv* dbenv);
private:
// We can add data to this class if needed
// since it is implemented via a pointer.
// (see comment at top)
// Note: use DbTxnMgr::open() or DbEnv::get_tx_info()
// to get pointers to a DbTxnMgr,
// and call DbTxnMgr::close() rather than delete to release them.
//
DbTxnMgr();
~DbTxnMgr();
// no copying
DbTxnMgr(const DbTxnMgr &);
operator = (const DbTxnMgr &);
DEFINE_DB_CLASS(DbTxnMgr);
};
class _exported DbTxn
{
friend DbTxnMgr;
public:
int abort();
int commit();
u_int32_t id();
int prepare();
private:
// We can add data to this class if needed
// since it is implemented via a pointer.
// (see comment at top)
// Note: use DbTxnMgr::begin() to get pointers to a DbTxn,
// and call DbTxn::abort() or DbTxn::commit rather than
// delete to release them.
//
DbTxn();
~DbTxn();
// no copying
DbTxn(const DbTxn &);
operator = (const DbTxn &);
DEFINE_DB_CLASS(DbTxn);
};
////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////
//
// Application classes
//
//
// A set of application options - define how this application uses
// the db library.
//
class _exported DbInfo : protected DB_INFO
{
friend DbEnv;
friend Db;
public:
DbInfo();
~DbInfo();
// Byte order.
int get_lorder() const;
void set_lorder(int);
// Underlying cache size.
size_t get_cachesize() const;
void set_cachesize(size_t);
// Underlying page size.
size_t get_pagesize() const;
void set_pagesize(size_t);
// Local heap allocation.
typedef void *(*db_malloc_fcn)(size_t);
db_malloc_fcn get_malloc() const;
void set_malloc(db_malloc_fcn);
////////////////////////////////////////////////////////////////
// Btree access method.
// Maximum keys per page.
int get_bt_maxkey() const;
void set_bt_maxkey(int);
// Minimum keys per page.
int get_bt_minkey() const;
void set_bt_minkey(int);
// Comparison function.
typedef int (*bt_compare_fcn)(const DBT *, const DBT *);
bt_compare_fcn get_bt_compare() const;
void set_bt_compare(bt_compare_fcn);
// Prefix function.
typedef size_t (*bt_prefix_fcn)(const DBT *, const DBT *);
bt_prefix_fcn get_bt_prefix() const;
void set_bt_prefix(bt_prefix_fcn);
////////////////////////////////////////////////////////////////
// Hash access method.
// Fill factor.
unsigned int get_h_ffactor() const;
void set_h_ffactor(unsigned int);
// Number of elements.
unsigned int get_h_nelem() const;
void set_h_nelem(unsigned int);
// Hash function.
typedef u_int32_t (*h_hash_fcn)(const void *, u_int32_t);
h_hash_fcn get_h_hash() const;
void set_h_hash(h_hash_fcn);
////////////////////////////////////////////////////////////////
// Recno access method.
// Fixed-length padding byte.
int get_re_pad() const;
void set_re_pad(int);
// Variable-length delimiting byte.
int get_re_delim() const;
void set_re_delim(int);
// Length for fixed-length records.
u_int32_t get_re_len() const;
void set_re_len(u_int32_t);
// Source file name.
char *get_re_source() const;
void set_re_source(char *);
// Note: some flags are set as side effects of calling
// above "set" methods.
//
u_int32_t get_flags() const;
void set_flags(u_int32_t);
// (deep) copying of this object is allowed.
//
DbInfo(const DbInfo &);
DbInfo &operator = (const DbInfo &);
private:
// We can add data to this class if needed
// since parent class is not allocated by db.
// (see comment at top)
};
//
// Base application class. Provides functions for opening a database.
// User of this library can use this class as a starting point for
// developing a DB application - derive their application class from
// this one, add application control logic.
//
// Note that if you use the default constructor, you must explicitly
// call appinit() before any other db activity (e.g. opening files)
//
class _exported DbEnv : protected DB_ENV
{
friend DbTxnMgr;
friend DbLog;
friend DbLockTab;
friend DbMpool;
friend Db;
public:
~DbEnv();
// This constructor can be used to immediately initialize the
// application with these arguments. Do not use it if you
// need to set other parameters via the access methods.
//
DbEnv(const char *homeDir, char *const *db_config, int flags);
// Use this constructor if you wish to *delay* the initialization
// of the db library. This is useful if you need to set
// any particular parameters via the access methods below.
// Then call appinit() to complete the initialization.
//
DbEnv();
// Used in conjunction with the default constructor to
// complete the initialization of the db library.
//
int appinit(const char *homeDir, char *const *db_config, int flags);
////////////////////////////////////////////////////////////////
// simple get/set access methods
//
// If you are calling set_ methods, you need to
// use the default constructor along with appinit().
// Byte order.
int get_lorder() const;
void set_lorder(int);
// Error message callback.
typedef void (*db_errcall_fcn)(const char *, char *);
db_errcall_fcn get_errcall() const;
void set_errcall(db_errcall_fcn);
// Error message file stream.
FILE *get_errfile() const;
void set_errfile(FILE *);
// Error message prefix.
const char *get_errpfx() const;
void set_errpfx(const char *);
// Generate debugging messages.
int get_verbose() const;
void set_verbose(int);
////////////////////////////////////////////////////////////////
// User paths.
// Database home.
char *get_home() const;
void set_home(char *);
// Database log file directory.
char *get_log_dir() const;
void set_log_dir(char *);
// Database tmp file directory.
char *get_tmp_dir() const;
void set_tmp_dir(char *);
// Database data file directories.
char **get_data_dir() const;
void set_data_dir(char **);
// Database data file slots.
int get_data_cnt() const;
void set_data_cnt(int);
// Next Database data file slot.
int get_data_next() const;
void set_data_next(int);
////////////////////////////////////////////////////////////////
// Locking.
// Return from lock_open().
DbLockTab *get_lk_info() const;
// Two dimensional conflict matrix.
u_int8_t *get_lk_conflicts() const;
void set_lk_conflicts(u_int8_t *);
// Number of lock modes in table.
int get_lk_modes() const;
void set_lk_modes(int);
// Maximum number of locks.
unsigned int get_lk_max() const;
void set_lk_max(unsigned int);
// Deadlock detect on every conflict.
u_int32_t get_lk_detect() const;
void set_lk_detect(u_int32_t);
// Yield function for threads.
typedef int (*db_yield_fcn) (void);
db_yield_fcn get_yield() const;
void set_yield(db_yield_fcn);
////////////////////////////////////////////////////////////////
// Logging.
// Return from log_open().
DbLog *get_lg_info() const;
// Maximum file size.
u_int32_t get_lg_max() const;
void set_lg_max(u_int32_t);
////////////////////////////////////////////////////////////////
// Memory pool.
// Return from memp_open().
DbMpool *get_mp_info() const;
// Maximum file size for mmap.
size_t get_mp_mmapsize() const;
void set_mp_mmapsize(size_t);
// Bytes in the mpool cache.
size_t get_mp_size() const;
void set_mp_size(size_t);
////////////////////////////////////////////////////////////////
// Transactions.
// Return from txn_open().
DbTxnMgr *get_tx_info() const;
// Maximum number of transactions.
unsigned int get_tx_max() const;
void set_tx_max(unsigned int);
// Dispatch function for recovery.
typedef int (*tx_recover_fcn)(DB_LOG *, DBT *, DB_LSN *, int, void *);
tx_recover_fcn get_tx_recover() const;
void set_tx_recover(tx_recover_fcn);
// Flags.
u_int32_t get_flags() const;
void set_flags(u_int32_t);
////////////////////////////////////////////////////////////////
// The default error model is to throw an exception whenever
// an error occurs. This generally allows for cleaner logic
// for transaction processing, as a try block can surround a
// single transaction. Alternatively, since almost every method
// returns an error code (errno), the error model can be set to
// not throw exceptions, and instead return the appropriate code.
//
enum ErrorModel { Exception, ErrorReturn };
void set_error_model(ErrorModel);
ErrorModel get_error_model() const;
// If an error is detected and the error call function
// or stream is set, a message is dispatched or printed.
// If a prefix is set, each message is prefixed.
//
// You can use set_errcall() or set_errfile() above to control
// error functionality using a C model. Alternatively, you can
// call set_error_stream() to force all errors to a C++ stream.
// It is unwise to mix these approaches.
//
class ostream* get_error_stream() const;
void set_error_stream(class ostream*);
// used internally
static int runtime_error(const char *caller, int err, int in_destructor = 0);
private:
// We can add data to this class if needed
// since parent class is not allocated by db.
// (see comment at top)
// no copying
DbEnv(const DbEnv &);
operator = (const DbEnv &);
ErrorModel error_model_;
static void stream_error_function(const char *, char *);
static ostream *error_stream_;
};
////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////
//
// Table access classes
//
//
// Represents a database table = a set of keys with associated values.
//
class _exported Db
{
friend DbEnv;
public:
int close(int flags);
int cursor(DbTxn *txnid, Dbc **cursorp);
int del(Dbt *key, DbTxn *txnid);
int fd(int *fdp);
int get(DbTxn *txnid, Dbt *key, Dbt *data, int flags);
int put(DbTxn *txnid, Dbt *key, Dbt *data, int flags);
int stat(void *sp, void *(*db_malloc)(size_t), int flags);
int sync(int flags);
DBTYPE get_type() const;
static int open(const char *fname, DBTYPE type, int flags,
int mode, DbEnv *dbenv, DbInfo *info, Db **dbpp);
private:
// We can add data to this class if needed
// since it is implemented via a pointer.
// (see comment at top)
// Note: use Db::open() to get initialize pointers to a Db,
// and call Db::close() rather than delete to release them.
Db();
~Db();
// no copying
Db(const Db &);
Db &operator = (const Db &);
DEFINE_DB_CLASS(Db);
};
//
// A chunk of data, maybe a key or value.
//
class _exported Dbt : private DBT
{
friend Dbc;
friend Db;
friend DbLog;
friend DbMpoolFile;
friend DbLockTab;
public:
// key/data
void *get_data() const;
void set_data(void *);
// key/data length
u_int32_t get_size() const;
void set_size(u_int32_t);
// RO: length of user buffer.
u_int32_t get_ulen() const;
void set_ulen(u_int32_t);
// RO: get/put record length.
u_int32_t get_dlen() const;
void set_dlen(u_int32_t);
// RO: get/put record offset.
u_int32_t get_doff() const;
void set_doff(u_int32_t);
// flags
u_int32_t get_flags() const;
void set_flags(u_int32_t);
Dbt(void *data, size_t size);
Dbt();
~Dbt();
Dbt(const Dbt &);
Dbt &operator = (const Dbt &);
private:
// We can add data to this class if needed
// since parent class is not allocated by db.
// (see comment at top)
};
class _exported Dbc : protected DBC
{
friend Db;
public:
int close();
int del(int flags);
int get(Dbt* key, Dbt *data, int flags);
int put(Dbt* key, Dbt *data, int flags);
private:
// No data is permitted in this class (see comment at top)
// Note: use Db::cursor() to get pointers to a Dbc,
// and call Dbc::close() rather than delete to release them.
//
Dbc();
~Dbc();
// no copying
Dbc(const Dbc &);
Dbc &operator = (const Dbc &);
};
#endif /* !_DB_CXX_H_ */

73
db2/include/db_dispatch.h Normal file
View File

@ -0,0 +1,73 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1995, 1996
* The President and Fellows of Harvard University. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)db_dispatch.h 10.1 (Sleepycat) 4/12/97
*/
#ifndef _DB_DISPATCH_H
#define _DB_DISPATCH_H
/*
* Declarations and typedefs for the list of transaction IDs used during
* recovery.
*/
typedef struct __db_txnhead {
LIST_HEAD(__db_headlink, _db_txnlist) head;
u_int32_t maxid;
} __db_txnhead;
typedef struct _db_txnlist {
LIST_ENTRY(_db_txnlist) links;
u_int32_t txnid;
} __db_txnlist;
#define DB_log_BEGIN 0
#define DB_txn_BEGIN 5
#define DB_ham_BEGIN 20
#define DB_db_BEGIN 40
#define DB_bam_BEGIN 50
#define DB_ram_BEGIN 100
#define DB_user_BEGIN 150
#define TXN_UNDO 0
#define TXN_REDO 1
#define TXN_BACKWARD_ROLL -1
#define TXN_FORWARD_ROLL -2
#define TXN_OPENFILES -3
#endif

114
db2/include/db_ext.h Normal file
View File

@ -0,0 +1,114 @@
/* Do not edit: automatically built by dist/distrib. */
int __db_pgerr __P((DB *, db_pgno_t));
int __db_pgfmt __P((DB *, db_pgno_t));
int __db_addrem_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, u_int32_t, db_pgno_t, u_int32_t,
size_t, DBT *, DBT *, DB_LSN *));
int __db_addrem_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __db_addrem_read __P((void *, __db_addrem_args **));
int __db_split_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, u_int32_t, db_pgno_t, DBT *,
DB_LSN *));
int __db_split_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __db_split_read __P((void *, __db_split_args **));
int __db_big_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, u_int32_t, db_pgno_t, db_pgno_t,
db_pgno_t, DBT *, DB_LSN *, DB_LSN *,
DB_LSN *));
int __db_big_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __db_big_read __P((void *, __db_big_args **));
int __db_ovref_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, db_pgno_t, DB_LSN *));
int __db_ovref_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __db_ovref_read __P((void *, __db_ovref_args **));
int __db_relink_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t,
DB_LSN *, db_pgno_t, DB_LSN *));
int __db_relink_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __db_relink_read __P((void *, __db_relink_args **));
int __db_addpage_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t,
DB_LSN *));
int __db_addpage_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __db_addpage_read __P((void *, __db_addpage_args **));
int __db_debug_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
DBT *, u_int32_t, DBT *, DBT *,
u_int32_t));
int __db_debug_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __db_debug_read __P((void *, __db_debug_args **));
int __db_noop_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t));
int __db_noop_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __db_noop_read __P((void *, __db_noop_args **));
int __db_init_print __P((DB_ENV *));
int __db_init_recover __P((DB_ENV *));
int __db_pgin __P((db_pgno_t, void *));
int __db_pgout __P((db_pgno_t, void *));
int __db_dispatch __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __db_add_recovery __P((DB_ENV *,
int (*)(DB_LOG *, DBT *, DB_LSN *, int, void *), u_int32_t));
int __db_txnlist_init __P((void *));
int __db_txnlist_add __P((void *, u_int32_t));
int __db_txnlist_find __P((void *, u_int32_t));
int __db_dput __P((DB *,
DBT *, PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **)));
int __db_drem __P((DB *,
PAGE **, u_int32_t, int (*)(DB *, PAGE *)));
int __db_dend __P((DB *, db_pgno_t, PAGE **));
int __db_ditem __P((DB *, PAGE *, int, u_int32_t));
int __db_pitem
__P((DB *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *));
int __db_relink __P((DB *, PAGE *, PAGE **, int));
int __db_ddup __P((DB *, db_pgno_t, int (*)(DB *, PAGE *)));
int __db_goff __P((DB *, DBT *,
u_int32_t, db_pgno_t, void **, u_int32_t *));
int __db_poff __P((DB *, const DBT *, db_pgno_t *,
int (*)(DB *, u_int32_t, PAGE **)));
int __db_ioff __P((DB *, db_pgno_t));
int __db_doff __P((DB *, db_pgno_t, int (*)(DB *, PAGE *)));
int __db_moff __P((DB *, const DBT *, db_pgno_t));
void __db_loadme __P((void));
FILE *__db_prinit __P((FILE *));
int __db_dump __P((DB *, char *, int));
int __db_prdb __P((DB *));
int __db_prbtree __P((DB *));
int __db_prhash __P((DB *));
int __db_prtree __P((DB_MPOOLFILE *, int));
int __db_prnpage __P((DB_MPOOLFILE *, db_pgno_t));
int __db_prpage __P((PAGE *, int));
int __db_isbad __P((PAGE *, int));
void __db_pr __P((u_int8_t *, u_int32_t));
void __db_prflags __P((u_int32_t, const FN *));
int __db_addrem_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __db_split_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __db_big_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __db_ovref_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __db_relink_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __db_addpage_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __db_debug_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __db_noop_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __db_ret __P((DB *,
PAGE *, u_int32_t, DBT *, void **, u_int32_t *));
int __db_retcopy __P((DBT *,
void *, u_int32_t, void **, u_int32_t *, void *(*)(size_t)));
int __db_gethandle __P((DB *, int (*)(DB *, DB *), DB **));
int __db_puthandle __P((DB *));

332
db2/include/db_int.h.src Normal file
View File

@ -0,0 +1,332 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
* @(#)db_int.h.src 10.28 (Sleepycat) 8/20/97
*/
#ifndef _DB_INTERNAL_H_
#define _DB_INTERNAL_H_
#include "db.h" /* Standard DB include file. */
#include "queue.h"
#include "os_ext.h"
/*******************************************************
* General purpose constants and macros.
*******************************************************/
#define UINT32_T_MAX 0xffffffff /* Maximum 32 bit unsigned. */
#define UINT16_T_MAX 0xffff /* Maximum 16 bit unsigned. */
#define DB_MIN_PGSIZE 0x000200 /* Minimum page size. */
#define DB_MAX_PGSIZE 0x010000 /* Maximum page size. */
#define DB_MINCACHE 10 /* Minimum cached pages */
/*
* Aligning items to particular sizes or in pages or memory. ALIGNP is a
* separate macro, as we've had to cast the pointer to different integral
* types on different architectures.
*
* We cast pointers into unsigned longs when manipulating them because C89
* guarantees that u_long is the largest available integral type and further,
* to never generate overflows. However, neither C89 or C9X requires that
* any integer type be large enough to hold a pointer, although C9X created
* the intptr_t type, which is guaranteed to hold a pointer but may or may
* not exist. At some point in the future, we should test for intptr_t and
* use it where available.
*/
#undef ALIGNTYPE
#define ALIGNTYPE u_long
#undef ALIGNP
#define ALIGNP(value, bound) ALIGN((ALIGNTYPE)value, bound)
#undef ALIGN
#define ALIGN(value, bound) (((value) + (bound) - 1) & ~((bound) - 1))
/*
* There are several on-page structures that are declared to have a number of
* fields followed by a variable length array of items. The structure size
* without including the variable length array or the address of the first of
* those elements can be found using SSZ.
*
* This macro can also be used to find the offset of a structure element in a
* structure. This is used in various places to copy structure elements from
* unaligned memory references, e.g., pointers into a packed page.
*
* There are two versions because compilers object if you take the address of
* an array.
*/
#undef SSZ
#define SSZ(name, field) ((int)&(((name *)0)->field))
#undef SSZA
#define SSZA(name, field) ((int)&(((name *)0)->field[0]))
/* Free and free-string macros that overwrite memory during debugging. */
#ifdef DEBUG
#undef FREE
#define FREE(p, len) { \
memset(p, 0xff, len); \
free(p); \
}
#undef FREES
#define FREES(p) { \
FREE(p, strlen(p)); \
}
#else
#undef FREE
#define FREE(p, len) { \
free(p); \
}
#undef FREES
#define FREES(p) { \
free(p); \
}
#endif
/* Structure used to print flag values. */
typedef struct __fn {
u_int32_t mask; /* Flag value. */
char *name; /* Flag name. */
} FN;
/* Set, clear and test flags. */
#define F_SET(p, f) (p)->flags |= (f)
#define F_CLR(p, f) (p)->flags &= ~(f)
#define F_ISSET(p, f) ((p)->flags & (f))
#define LF_SET(f) (flags |= (f))
#define LF_CLR(f) (flags &= ~(f))
#define LF_ISSET(f) (flags & (f))
/* Display separator string. */
#undef DB_LINE
#define DB_LINE "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
/*******************************************************
* Files.
*******************************************************/
#ifndef MAXPATHLEN /* Maximum path length. */
#ifdef PATH_MAX
#define MAXPATHLEN PATH_MAX
#else
#define MAXPATHLEN 1024
#endif
#endif
#define PATH_DOT "." /* Current working directory. */
#define PATH_SEPARATOR "/" /* Path separator character. */
#ifndef S_IRUSR /* UNIX specific file permissions. */
#define S_IRUSR 0000400 /* R for owner */
#define S_IWUSR 0000200 /* W for owner */
#define S_IRGRP 0000040 /* R for group */
#define S_IWGRP 0000020 /* W for group */
#define S_IROTH 0000004 /* R for other */
#define S_IWOTH 0000002 /* W for other */
#endif
#ifndef S_ISDIR /* UNIX specific: directory test. */
#define S_ISDIR(m) ((m & 0170000) == 0040000)
#endif
/*******************************************************
* Mutex support.
*******************************************************/
@spin_line1@
@spin_line2@
@spin_line3@
/*
* !!!
* Various systems require different alignments for mutexes (the worst we've
* seen so far is 16-bytes on some HP architectures). The mutex (tsl_t) must
* be first in the db_mutex_t structure, which must itself be first in the
* region. This ensures the alignment is as returned by mmap(2), which should
* be sufficient. All other mutex users must ensure proper alignment locally.
*/
#define MUTEX_ALIGNMENT @mutex_align@
/*
* The offset of a mutex in memory.
*/
#define MUTEX_LOCK_OFFSET(a, b) ((off_t)((u_int8_t *)b - (u_int8_t *)a))
typedef struct _db_mutex_t {
#ifdef HAVE_SPINLOCKS
tsl_t tsl_resource; /* Resource test and set. */
#ifdef DEBUG
u_long pid; /* Lock holder: 0 or process pid. */
#endif
#else
off_t off; /* Backing file offset. */
u_long pid; /* Lock holder: 0 or process pid. */
#endif
#ifdef MUTEX_STATISTICS
u_long mutex_set_wait; /* Blocking mutex: required waiting. */
u_long mutex_set_nowait; /* Blocking mutex: without waiting. */
#endif
} db_mutex_t;
#include "mutex_ext.h"
/*******************************************************
* Access methods.
*******************************************************/
/* Lock/unlock a DB thread. */
#define DB_THREAD_LOCK(dbp) \
(F_ISSET(dbp, DB_AM_THREAD) ? \
__db_mutex_lock((db_mutex_t *)(dbp)->mutex, -1, \
(dbp)->dbenv == NULL ? NULL : (dbp)->dbenv->db_yield) : 0)
#define DB_THREAD_UNLOCK(dbp) \
(F_ISSET(dbp, DB_AM_THREAD) ? \
__db_mutex_unlock((db_mutex_t *)(dbp)->mutex, -1) : 0)
/* Btree/recno local statistics structure. */
struct __db_bt_lstat; typedef struct __db_bt_lstat DB_BTREE_LSTAT;
struct __db_bt_lstat {
u_int32_t bt_freed; /* Pages freed for reuse. */
u_int32_t bt_pfxsaved; /* Bytes saved by prefix compression. */
u_int32_t bt_split; /* Total number of splits. */
u_int32_t bt_rootsplit; /* Root page splits. */
u_int32_t bt_fastsplit; /* Fast splits. */
u_int32_t bt_added; /* Items added. */
u_int32_t bt_deleted; /* Items deleted. */
u_int32_t bt_get; /* Items retrieved. */
u_int32_t bt_cache_hit; /* Hits in fast-insert code. */
u_int32_t bt_cache_miss; /* Misses in fast-insert code. */
};
/*******************************************************
* Environment.
*******************************************************/
/* Type passed to __db_appname(). */
typedef enum {
DB_APP_NONE=0, /* No type (region). */
DB_APP_DATA, /* Data file. */
DB_APP_LOG, /* Log file. */
DB_APP_TMP /* Temporary file. */
} APPNAME;
/*******************************************************
* Regions.
*******************************************************/
/*
* The shared memory regions share an initial structure so that the general
* region code can handle races between the region being deleted and other
* processes waiting on the region mutex.
*
* !!!
* Note, the mutex must be the first entry in the region; see comment above.
*/
typedef struct _rlayout {
db_mutex_t lock; /* Region mutex. */
u_int32_t refcnt; /* Region reference count. */
size_t size; /* Region length. */
int majver; /* Major version number. */
int minver; /* Minor version number. */
int patch; /* Patch version number. */
#define DB_R_DELETED 0x01 /* Region was deleted. */
u_int32_t flags;
} RLAYOUT;
/*******************************************************
* Mpool.
*******************************************************/
/*
* File types for DB access methods. Negative numbers are reserved to DB.
*/
#define DB_FTYPE_BTREE -1 /* Btree. */
#define DB_FTYPE_HASH -2 /* Hash. */
/* Structure used as the DB pgin/pgout pgcookie. */
typedef struct __dbpginfo {
size_t db_pagesize; /* Underlying page size. */
int needswap; /* If swapping required. */
} DB_PGINFO;
/*******************************************************
* Log.
*******************************************************/
/* Initialize an LSN to 'zero'. */
#define ZERO_LSN(LSN) { \
(LSN).file = 0; \
(LSN).offset = 0; \
}
/* Return 1 if LSN is a 'zero' lsn, otherwise return 0. */
#define IS_ZERO_LSN(LSN) ((LSN).file == 0)
/* Test if we need to log a change. */
#define DB_LOGGING(dbp) \
(F_ISSET(dbp, DB_AM_LOGGING) && !F_ISSET(dbp, DB_AM_RECOVER))
#ifdef DEBUG
/*
* Debugging macro to log operations.
* If DEBUG_WOP is defined, log operations that modify the database.
* If DEBUG_ROP is defined, log operations that read the database.
*
* D dbp
* T txn
* O operation (string)
* K key
* A data
* F flags
*/
#define LOG_OP(D, T, O, K, A, F) { \
DB_LSN _lsn; \
DBT _op; \
if (DB_LOGGING((D))) { \
memset(&_op, 0, sizeof(_op)); \
_op.data = O; \
_op.size = strlen(O) + 1; \
(void)__db_debug_log((D)->dbenv->lg_info, \
T, &_lsn, 0, &_op, (D)->log_fileid, K, A, F); \
} \
}
#ifdef DEBUG_ROP
#define DEBUG_LREAD(D, T, O, K, A, F) LOG_OP(D, T, O, K, A, F)
#else
#define DEBUG_LREAD(D, T, O, K, A, F)
#endif
#ifdef DEBUG_WOP
#define DEBUG_LWRITE(D, T, O, K, A, F) LOG_OP(D, T, O, K, A, F)
#else
#define DEBUG_LWRITE(D, T, O, K, A, F)
#endif
#else
#define DEBUG_LREAD(D, T, O, K, A, F)
#define DEBUG_LWRITE(D, T, O, K, A, F)
#endif /* DEBUG */
/*******************************************************
* Transactions and recovery.
*******************************************************/
/*
* The locker id space is divided between the transaction manager and the lock
* manager. Lockid's start at 0 and go to MAX_LOCKER_ID. Txn Id's start at
* MAX_LOCKER_ID + 1 and go up to MAX_TXNID.
*/
#define MAX_LOCKER_ID 0x0fffffff
#define MAX_TXNID 0xffffffff
/*
* Out of band value for a lock. The locks are returned to callers as offsets
* into the lock regions. Since the RLAYOUT structure begins all regions, an
* offset of 0 is guaranteed not to be a valid lock.
*/
#define LOCK_INVALID 0
/* The structure allocated for every transaction. */
struct __db_txn {
DB_TXNMGR *mgrp; /* Pointer to transaction manager. */
DB_TXN *parent; /* Pointer to transaction's parent. */
DB_LSN last_lsn; /* Lsn of last log write. */
u_int32_t txnid; /* Unique transaction id. */
size_t off; /* Detail structure within region. */
TAILQ_ENTRY(__db_txn) links;
};
#endif /* !_DB_INTERNAL_H_ */

535
db2/include/db_page.h Normal file
View File

@ -0,0 +1,535 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
* @(#)db_page.h 10.10 (Sleepycat) 8/18/97
*/
#ifndef _DB_PAGE_H_
#define _DB_PAGE_H_
/*
* DB page formats.
*
* This implementation requires that values within the following structures
* NOT be padded -- note, ANSI C permits random padding within structures.
* If your compiler pads randomly you can just forget ever making DB run on
* your system. In addition, no data type can require larger alignment than
* its own size, e.g., a 4-byte data element may not require 8-byte alignment.
*
* Note that key/data lengths are often stored in db_indx_t's -- this is
* not accidental, nor does it limit the key/data size. If the key/data
* item fits on a page, it's guaranteed to be small enough to fit into a
* db_indx_t, and storing it in one saves space.
*/
#define PGNO_METADATA 0 /* Metadata page number. */
#define PGNO_INVALID 0 /* Metadata page number, therefore illegal. */
#define PGNO_ROOT 1 /* Root is page #1. */
/************************************************************************
BTREE METADATA PAGE LAYOUT
************************************************************************/
/*
* Btree metadata page layout:
*
* +-----------------------------------+
* | lsn | pgno | magic |
* +-----------------------------------+
* | version | pagesize | free |
* +-----------------------------------+
* | flags | unused ... |
* +-----------------------------------+
*/
typedef struct _btmeta {
DB_LSN lsn; /* 00-07: LSN. */
db_pgno_t pgno; /* 08-11: Current page number. */
u_int32_t magic; /* 12-15: Magic number. */
u_int32_t version; /* 16-19: Version. */
u_int32_t pagesize; /* 20-23: Pagesize. */
u_int32_t maxkey; /* 24-27: Btree: Maxkey. */
u_int32_t minkey; /* 28-31: Btree: Minkey. */
u_int32_t free; /* 32-35: Free list page number. */
#define BTM_DUP 0x001 /* Duplicates. */
#define BTM_RECNO 0x002 /* Recno tree. */
#define BTM_RECNUM 0x004 /* Btree: maintain record count. */
#define BTM_FIXEDLEN 0x008 /* Recno: fixed length records. */
#define BTM_RENUMBER 0x010 /* Recno: renumber on insert/delete. */
#define BTM_MASK 0x01f
u_int32_t flags; /* 36-39: Flags. */
u_int32_t re_len; /* 40-43: Recno: fixed-length record length. */
u_int32_t re_pad; /* 44-47: Recno: fixed-length record pad. */
/* 48-67: Unique file ID. */
u_int8_t uid[DB_FILE_ID_LEN];
u_int32_t spare[13]; /* 68-123: Save some room for growth. */
DB_BTREE_LSTAT stat; /* 124-163: Statistics. */
} BTMETA;
/************************************************************************
HASH METADATA PAGE LAYOUT
************************************************************************/
/*
* Hash metadata page layout:
*
* +-----------------------------------+
* | lsn | magic | version |
* +-----------------------------------+
* | pagesize | ovfl_point| last_freed|
* +-----------------------------------+
* | max_bucket| high_mask | low_mask |
* +-----------------------------------+
* | ffactor | nelem | charkey |
* +-----------------------------------+
* | spares[32]| flags | unused |
* +-----------------------------------+
*/
/* Hash Table Information */
typedef struct hashhdr { /* Disk resident portion */
DB_LSN lsn; /* 00-07: LSN of the header page */
db_pgno_t pgno; /* 08-11: Page number (btree compatibility). */
u_int32_t magic; /* 12-15: Magic NO for hash tables */
u_int32_t version; /* 16-19: Version ID */
u_int32_t pagesize; /* 20-23: Bucket/Page Size */
u_int32_t ovfl_point; /* 24-27: Overflow page allocation location */
u_int32_t last_freed; /* 28-31: Last freed overflow page pgno */
u_int32_t max_bucket; /* 32-35: ID of Maximum bucket in use */
u_int32_t high_mask; /* 36-39: Modulo mask into table */
u_int32_t low_mask; /* 40-43: Modulo mask into table lower half */
u_int32_t ffactor; /* 44-47: Fill factor */
u_int32_t nelem; /* 48-51: Number of keys in hash table */
u_int32_t h_charkey; /* 52-55: Value of hash(CHARKEY) */
#define DB_HASH_DUP 0x01
u_int32_t flags; /* 56-59: Allow duplicates. */
#define NCACHED 32 /* number of spare points */
/* 60-187: Spare pages for overflow */
u_int32_t spares[NCACHED];
/* 188-207: Unique file ID. */
u_int8_t uid[DB_FILE_ID_LEN];
/*
* Minimum page size is 256.
*/
} HASHHDR;
/************************************************************************
MAIN PAGE LAYOUT
************************************************************************/
/*
* +-----------------------------------+
* | lsn | pgno | prev pgno |
* +-----------------------------------+
* | next pgno | entries | hf offset |
* +-----------------------------------+
* | level | type | index |
* +-----------------------------------+
* | index | free --> |
* +-----------+-----------------------+
* | F R E E A R E A |
* +-----------------------------------+
* | <-- free | item |
* +-----------------------------------+
* | item | item | item |
* +-----------------------------------+
*
* sizeof(PAGE) == 26 bytes, and the following indices are guaranteed to be
* two-byte aligned.
*
* For hash and btree leaf pages, index items are paired, e.g., inp[0] is the
* key for inp[1]'s data. All other types of pages only contain single items.
*/
typedef struct _db_page {
DB_LSN lsn; /* 00-07: Log sequence number. */
db_pgno_t pgno; /* 08-11: Current page number. */
db_pgno_t prev_pgno; /* 12-15: Previous page number. */
db_pgno_t next_pgno; /* 16-19: Next page number. */
db_indx_t entries; /* 20-21: Number of item pairs on the page. */
db_indx_t hf_offset; /* 22-23: High free byte page offset. */
/*
* The btree levels are numbered from the leaf to the root, starting
* with 1, so the leaf is level 1, its parent is level 2, and so on.
* We maintain this level on all btree pages, but the only place that
* we actually need it is on the root page. It would not be difficult
* to hide the byte on the root page once it becomes an internal page,
* so we could get this byte back if we needed it for something else.
*/
#define LEAFLEVEL 1
#define MAXBTREELEVEL 255
u_int8_t level; /* 24: Btree tree level. */
#define P_INVALID 0 /* Invalid page type. */
#define P_DUPLICATE 1 /* Duplicate. */
#define P_HASH 2 /* Hash. */
#define P_IBTREE 3 /* Btree internal. */
#define P_IRECNO 4 /* Recno internal. */
#define P_LBTREE 5 /* Btree leaf. */
#define P_LRECNO 6 /* Recno leaf. */
#define P_OVERFLOW 7 /* Overflow. */
u_int8_t type; /* 25: Page type. */
db_indx_t inp[1]; /* Variable length index of items. */
} PAGE;
/* Element macros. */
#define LSN(p) (((PAGE *)p)->lsn)
#define PGNO(p) (((PAGE *)p)->pgno)
#define PREV_PGNO(p) (((PAGE *)p)->prev_pgno)
#define NEXT_PGNO(p) (((PAGE *)p)->next_pgno)
#define NUM_ENT(p) (((PAGE *)p)->entries)
#define HOFFSET(p) (((PAGE *)p)->hf_offset)
#define LEVEL(p) (((PAGE *)p)->level)
#define TYPE(p) (((PAGE *)p)->type)
/*
* !!!
* The next_pgno and prev_pgno fields are not maintained for btree and recno
* internal pages. It's a minor performance improvement, and more, it's
* hard to do when deleting internal pages, and it decreases the chance of
* deadlock during deletes and splits.
*
* !!!
* The btree/recno access method needs db_recno_t bytes of space on the root
* page to specify how many records are stored in the tree. (The alternative
* is to store the number of records in the meta-data page, which will create
* a second hot spot in trees being actively modified, or recalculate it from
* the BINTERNAL fields on each access.) Overload the prev_pgno field.
*/
#define RE_NREC(p) \
(TYPE(p) == P_LBTREE ? NUM_ENT(p) / 2 : \
TYPE(p) == P_LRECNO ? NUM_ENT(p) : PREV_PGNO(p))
#define RE_NREC_ADJ(p, adj) \
PREV_PGNO(p) += adj;
#define RE_NREC_SET(p, num) \
PREV_PGNO(p) = num;
/*
* Initialize a page.
*
* !!!
* Don't modify the page's LSN, code depends on it being unchanged after a
* P_INIT call.
*/
#define P_INIT(pg, pg_size, n, pg_prev, pg_next, btl, pg_type) do { \
PGNO(pg) = n; \
PREV_PGNO(pg) = pg_prev; \
NEXT_PGNO(pg) = pg_next; \
NUM_ENT(pg) = 0; \
HOFFSET(pg) = pg_size; \
LEVEL(pg) = btl; \
TYPE(pg) = pg_type; \
} while (0)
/* Page header length (offset to first index). */
#define P_OVERHEAD (SSZA(PAGE, inp))
/* First free byte. */
#define LOFFSET(pg) (P_OVERHEAD + NUM_ENT(pg) * sizeof(db_indx_t))
/* Free space on the page. */
#define P_FREESPACE(pg) (HOFFSET(pg) - LOFFSET(pg))
/* Get a pointer to the bytes at a specific index. */
#define P_ENTRY(pg, indx) ((u_int8_t *)pg + ((PAGE *)pg)->inp[indx])
/************************************************************************
OVERFLOW PAGE LAYOUT
************************************************************************/
/*
* Overflow items are referenced by HOFFPAGE and BOVERFLOW structures, which
* store a page number (the first page of the overflow item) and a length
* (the total length of the overflow item). The overflow item consists of
* some number of overflow pages, linked by the next_pgno field of the page.
* A next_pgno field of PGNO_INVALID flags the end of the overflow item.
*
* Overflow page overloads:
* The amount of overflow data stored on each page is stored in the
* hf_offset field.
*
* The implementation reference counts overflow items as it's possible
* for them to be promoted onto btree internal pages. The reference
* count is stored in the entries field.
*/
#define OV_LEN(p) (((PAGE *)p)->hf_offset)
#define OV_REF(p) (((PAGE *)p)->entries)
/* Maximum number of bytes that you can put on an overflow page. */
#define P_MAXSPACE(psize) ((psize) - P_OVERHEAD)
/************************************************************************
HASH PAGE LAYOUT
************************************************************************/
/* Each index references a group of bytes on the page. */
#define H_KEYDATA 1 /* Key/data item. */
#define H_DUPLICATE 2 /* Duplicate key/data item. */
#define H_OFFPAGE 3 /* Overflow key/data item. */
#define H_OFFDUP 4 /* Overflow page of duplicates. */
/*
* The first and second types are H_KEYDATA and H_DUPLICATE, represented
* by the HKEYDATA structure:
*
* +-----------------------------------+
* | type | key/data ... |
* +-----------------------------------+
*
* For duplicates, the data field encodes duplicate elements in the data
* field:
*
* +---------------------------------------------------------------+
* | type | len1 | element1 | len1 | len2 | element2 | len2 |
* +---------------------------------------------------------------+
*
* Thus, by keeping track of the offset in the element, we can do both
* backward and forward traversal.
*/
typedef struct _hkeydata {
u_int8_t type; /* 00: Page type. */
u_int8_t data[1]; /* Variable length key/data item. */
} HKEYDATA;
/* Get a HKEYDATA item for a specific index. */
#define GET_HKEYDATA(pg, indx) \
((HKEYDATA *)P_ENTRY(pg, indx))
/*
* The length of any HKEYDATA item. Note that indx is an element index,
* not a PAIR index.
*/
#define LEN_HITEM(pg, pgsize, indx) \
(((indx) == 0 ? pgsize : pg->inp[indx - 1]) - pg->inp[indx])
#define LEN_HKEYDATA(pg, psize, indx) \
(((indx) == 0 ? psize : pg->inp[indx - 1]) - \
pg->inp[indx] - HKEYDATA_SIZE(0))
/*
* Page space required to add a new HKEYDATA item to the page, with and
* without the index value.
*/
#define HKEYDATA_SIZE(len) \
((len) + SSZA(HKEYDATA, data))
#define HKEYDATA_PSIZE(len) \
(HKEYDATA_SIZE(len) + sizeof(db_indx_t))
/* Put a HKEYDATA item at the location referenced by a page entry. */
#define PUT_HKEYDATA(pe, kd, len, type) { \
((HKEYDATA *)pe)->type = type; \
memcpy((u_int8_t *)pe + sizeof(u_int8_t), kd, len); \
}
/*
* Macros the describe the page layout in terms of key-data pairs.
* The use of "pindex" indicates that the argument is the index
* expressed in pairs instead of individual elements.
*/
#define H_NUMPAIRS(pg) (NUM_ENT(pg) / 2)
#define H_KEYINDEX(pindx) (2 * (pindx))
#define H_DATAINDEX(pindx) ((2 * (pindx)) + 1)
#define H_PAIRKEY(pg, pindx) GET_HKEYDATA(pg, H_KEYINDEX(pindx))
#define H_PAIRDATA(pg, pindx) GET_HKEYDATA(pg, H_DATAINDEX(pindx))
#define H_PAIRSIZE(pg, psize, pindx) \
(LEN_HITEM(pg, psize, H_KEYINDEX(pindx)) + \
LEN_HITEM(pg, psize, H_DATAINDEX(pindx)))
#define LEN_HDATA(p, psize, pindx) LEN_HKEYDATA(p, psize, H_DATAINDEX(pindx))
#define LEN_HKEY(p, psize, pindx) LEN_HKEYDATA(p, psize, H_KEYINDEX(pindx))
/*
* The third type is the H_OFFPAGE, represented by the HOFFPAGE structure:
*
* +-----------------------------------+
* | type | pgno_t | total len |
* +-----------------------------------+
*/
typedef struct _hoffpage {
u_int8_t type; /* 00: Page type and delete flag. */
u_int8_t unused[3]; /* 01-03: Padding, unused. */
db_pgno_t pgno; /* 04-07: Offpage page number. */
u_int32_t tlen; /* 08-11: Total length of item. */
} HOFFPAGE;
/* Get a HOFFPAGE item for a specific index. */
#define GET_HOFFPAGE(pg, indx) \
((HOFFPAGE *)P_ENTRY(pg, indx))
/*
* Page space required to add a new HOFFPAGE item to the page, with and
* without the index value.
*/
#define HOFFPAGE_SIZE (sizeof(HOFFPAGE))
#define HOFFPAGE_PSIZE (HOFFPAGE_SIZE + sizeof(db_indx_t))
/*
* The fourth type is H_OFFDUP represented by the HOFFDUP structure:
*
* +-----------------------+
* | type | pgno_t |
* +-----------------------+
*/
typedef struct _hoffdup {
u_int8_t type; /* 00: Page type and delete flag. */
u_int8_t unused[3]; /* 01-03: Padding, unused. */
db_pgno_t pgno; /* 04-07: Offpage page number. */
} HOFFDUP;
/* Get a HOFFDUP item for a specific index. */
#define GET_HOFFDUP(pg, indx) \
((HOFFDUP *)P_ENTRY(pg, indx))
/*
* Page space required to add a new HOFFDUP item to the page, with and
* without the index value.
*/
#define HOFFDUP_SIZE (sizeof(HOFFDUP))
#define HOFFDUP_PSIZE (HOFFDUP_SIZE + sizeof(db_indx_t))
/************************************************************************
BTREE PAGE LAYOUT
************************************************************************/
/* Each index references a group of bytes on the page. */
#define B_KEYDATA 1 /* Key/data item. */
#define B_DUPLICATE 2 /* Duplicate key/data item. */
#define B_OVERFLOW 3 /* Overflow key/data item. */
/*
* The first type is B_KEYDATA, represented by the BKEYDATA structure:
*
* +-----------------------------------+
* | length | type | key/data |
* +-----------------------------------+
*/
typedef struct _bkeydata {
db_indx_t len; /* 00-01: Key/data item length. */
u_int deleted :1; /* 02: Page type and delete flag. */
u_int type :7;
u_int8_t data[1]; /* Variable length key/data item. */
} BKEYDATA;
/* Get a BKEYDATA item for a specific index. */
#define GET_BKEYDATA(pg, indx) \
((BKEYDATA *)P_ENTRY(pg, indx))
/*
* Page space required to add a new BKEYDATA item to the page, with and
* without the index value.
*/
#define BKEYDATA_SIZE(len) \
ALIGN((len) + SSZA(BKEYDATA, data), 4)
#define BKEYDATA_PSIZE(len) \
(BKEYDATA_SIZE(len) + sizeof(db_indx_t))
/*
* The second and third types are B_DUPLICATE and B_OVERFLOW, represented
* by the BOVERFLOW structure:
*
* +-----------------------------------+
* | total len | type | unused |
* +-----------------------------------+
* | nxt: page | nxt: off | nxt: len |
* +-----------------------------------+
*/
typedef struct _boverflow {
db_indx_t unused1; /* 00-01: Padding, unused. */
u_int deleted :1; /* 02: Page type and delete flag. */
u_int type :7;
u_int8_t unused2; /* 03: Padding, unused. */
db_pgno_t pgno; /* 04-07: Next page number. */
u_int32_t tlen; /* 08-11: Total length of item. */
} BOVERFLOW;
/* Get a BOVERFLOW item for a specific index. */
#define GET_BOVERFLOW(pg, indx) \
((BOVERFLOW *)P_ENTRY(pg, indx))
/*
* Page space required to add a new BOVERFLOW item to the page, with and
* without the index value.
*/
#define BOVERFLOW_SIZE \
ALIGN(sizeof(BOVERFLOW), 4)
#define BOVERFLOW_PSIZE \
(BOVERFLOW_SIZE + sizeof(db_indx_t))
/*
* Btree leaf and hash page layouts group indices in sets of two, one
* for the key and one for the data. Everything else does it in sets
* of one to save space. I use the following macros so that it's real
* obvious what's going on...
*/
#define O_INDX 1
#define P_INDX 2
/************************************************************************
BTREE INTERNAL PAGE LAYOUT
************************************************************************/
/*
* Btree internal entry.
*
* +-----------------------------------+
* | leaf pgno | type | data ... |
* +-----------------------------------+
*/
typedef struct _binternal {
db_indx_t len; /* 00-01: Key/data item length. */
u_int deleted :1; /* 02: Page type and delete flag. */
u_int type :7;
u_int8_t unused; /* 03: Padding, unused. */
db_pgno_t pgno; /* 04-07: Page number of referenced page. */
db_recno_t nrecs; /* 08-11: Subtree record count. */
u_int8_t data[1]; /* Variable length key item. */
} BINTERNAL;
/* Get a BINTERNAL item for a specific index. */
#define GET_BINTERNAL(pg, indx) \
((BINTERNAL *)P_ENTRY(pg, indx))
/*
* Page space required to add a new BINTERNAL item to the page, with and
* without the index value.
*/
#define BINTERNAL_SIZE(len) \
ALIGN((len) + SSZA(BINTERNAL, data), 4)
#define BINTERNAL_PSIZE(len) \
(BINTERNAL_SIZE(len) + sizeof(db_indx_t))
/************************************************************************
RECNO INTERNAL PAGE LAYOUT
************************************************************************/
/*
* The recno internal entry.
*
* +-----------------------+
* | leaf pgno | # of recs |
* +-----------------------+
*
* XXX
* Why not fold this into the db_indx_t structure, it's fixed length.
*/
typedef struct _rinternal {
db_pgno_t pgno; /* 00-03: Page number of referenced page. */
db_recno_t nrecs; /* 04-07: Subtree record count. */
} RINTERNAL;
/* Get a RINTERNAL item for a specific index. */
#define GET_RINTERNAL(pg, indx) \
((RINTERNAL *)P_ENTRY(pg, indx))
/*
* Page space required to add a new RINTERNAL item to the page, with and
* without the index value.
*/
#define RINTERNAL_SIZE \
ALIGN(sizeof(RINTERNAL), 4)
#define RINTERNAL_PSIZE \
(RINTERNAL_SIZE + sizeof(db_indx_t))
#endif /* _DB_PAGE_H_ */

106
db2/include/db_shash.h Normal file
View File

@ -0,0 +1,106 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
* @(#)db_shash.h 10.1 (Sleepycat) 4/12/97
*/
/* Hash Headers */
typedef SH_TAILQ_HEAD(hash_head) DB_HASHTAB;
/*
* __db_hashlookup --
*
* Look up something in a shared memory hash table. The "elt" argument
* should be a key, and cmp_func must know how to compare a key to whatever
* structure it is that appears in the hash table. The comparison function
* cmp_func is called as: cmp_func(lookup_elt, table_elt);
* begin: address of the beginning of the hash table.
* type: the structure type of the elements that are linked in each bucket.
* field: the name of the field by which the "type" structures are linked.
* elt: the item for which we are searching in the hash table.
* result: the variable into which we'll store the element if we find it.
* nelems: the number of buckets in the hash table.
* hash_func: the hash function that operates on elements of the type of elt
* cmp_func: compare elements of the type of elt with those in the table (of
* type "type").
*
* If the element is not in the hash table, this macro exits with result
* set to NULL.
*/
#define __db_hashlookup(begin, type, field, elt, r, n, hash, cmp) do { \
DB_HASHTAB *__bucket; \
u_int32_t __ndx; \
\
__ndx = hash(elt) % (n); \
__bucket = &begin[__ndx]; \
for (r = SH_TAILQ_FIRST(__bucket, type); \
r != NULL; r = SH_TAILQ_NEXT(r, field, type)) \
if (cmp(elt, r)) \
break; \
} while(0)
/*
* __db_hashinsert --
*
* Insert a new entry into the hash table. This assumes that lookup has
* failed; don't call it if you haven't already called __db_hashlookup.
* begin: the beginning address of the hash table.
* type: the structure type of the elements that are linked in each bucket.
* field: the name of the field by which the "type" structures are linked.
* elt: the item to be inserted.
* nelems: the number of buckets in the hash table.
* hash_func: the hash function that operates on elements of the type of elt
*/
#define __db_hashinsert(begin, type, field, elt, n, hash) do { \
u_int32_t __ndx; \
DB_HASHTAB *__bucket; \
\
__ndx = hash(elt) % (n); \
__bucket = &begin[__ndx]; \
SH_TAILQ_INSERT_HEAD(__bucket, elt, field, type); \
} while(0)
/*
* __db_hashremove --
* Remove the entry with a key == elt.
* begin: address of the beginning of the hash table.
* type: the structure type of the elements that are linked in each bucket.
* field: the name of the field by which the "type" structures are linked.
* elt: the item to be deleted.
* nelems: the number of buckets in the hash table.
* hash_func: the hash function that operates on elements of the type of elt
* cmp_func: compare elements of the type of elt with those in the table (of
* type "type").
*/
#define __db_hashremove(begin, type, field, elt, n, hash, cmp) { \
u_int32_t __ndx; \
DB_HASHTAB *__bucket; \
SH_TAILQ_ENTRY *__entp; \
\
__ndx = hash(elt) % (n); \
__bucket = &begin[__ndx]; \
__db_hashlookup(begin, type, field, elt, __entp, n, hash, cmp); \
SH_TAILQ_REMOVE(__bucket, __entp, field, type); \
}
/*
* __db_hashremove_el --
* Given the object "obj" in the table, remove it.
* begin: address of the beginning of the hash table.
* type: the structure type of the elements that are linked in each bucket.
* field: the name of the field by which the "type" structures are linked.
* obj: the object in the table that we with to delete.
* nelems: the number of buckets in the hash table.
* hash_func: the hash function that operates on elements of the type of elt
*/
#define __db_hashremove_el(begin, type, field, obj, n, hash) { \
u_int32_t __ndx; \
DB_HASHTAB *__bucket; \
\
__ndx = hash(obj) % (n); \
__bucket = &begin[__ndx]; \
SH_TAILQ_REMOVE(__bucket, obj, field, type); \
}

105
db2/include/db_swap.h Normal file
View File

@ -0,0 +1,105 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)db_swap.h 10.3 (Sleepycat) 6/10/97
*/
#ifndef _DB_SWAP_H_
#define _DB_SWAP_H_
/*
* Little endian <==> big endian 32-bit swap macros.
* M_32_SWAP swap a memory location
* P_32_COPY copy potentially unaligned 4 byte quantities
* P_32_SWAP swap a referenced memory location
*/
#define M_32_SWAP(a) { \
u_int32_t _tmp; \
_tmp = a; \
((u_int8_t *)&a)[0] = ((u_int8_t *)&_tmp)[3]; \
((u_int8_t *)&a)[1] = ((u_int8_t *)&_tmp)[2]; \
((u_int8_t *)&a)[2] = ((u_int8_t *)&_tmp)[1]; \
((u_int8_t *)&a)[3] = ((u_int8_t *)&_tmp)[0]; \
}
#define P_32_COPY(a, b) { \
((u_int8_t *)b)[0] = ((u_int8_t *)a)[0]; \
((u_int8_t *)b)[1] = ((u_int8_t *)a)[1]; \
((u_int8_t *)b)[2] = ((u_int8_t *)a)[2]; \
((u_int8_t *)b)[3] = ((u_int8_t *)a)[3]; \
}
#define P_32_SWAP(a) { \
u_int32_t _tmp; \
P_32_COPY(a, &_tmp); \
((u_int8_t *)a)[0] = ((u_int8_t *)&_tmp)[3]; \
((u_int8_t *)a)[1] = ((u_int8_t *)&_tmp)[2]; \
((u_int8_t *)a)[2] = ((u_int8_t *)&_tmp)[1]; \
((u_int8_t *)a)[3] = ((u_int8_t *)&_tmp)[0]; \
}
/*
* Little endian <==> big endian 16-bit swap macros.
* M_16_SWAP swap a memory location
* P_16_COPY copy potentially unaligned from one location to another
* P_16_SWAP swap a referenced memory location
*/
#define M_16_SWAP(a) { \
u_int16_t _tmp; \
_tmp = (u_int16_t)a; \
((u_int8_t *)&a)[0] = ((u_int8_t *)&_tmp)[1]; \
((u_int8_t *)&a)[1] = ((u_int8_t *)&_tmp)[0]; \
}
#define P_16_COPY(a, b) { \
((u_int8_t *)b)[0] = ((u_int8_t *)a)[0]; \
((u_int8_t *)b)[1] = ((u_int8_t *)a)[1]; \
}
#define P_16_SWAP(a) { \
u_int16_t _tmp; \
P_16_COPY(a, &_tmp); \
((u_int8_t *)a)[0] = ((u_int8_t *)&_tmp)[1]; \
((u_int8_t *)a)[1] = ((u_int8_t *)&_tmp)[0]; \
}
#define SWAP32(p) { \
P_32_SWAP(p); \
(p) += sizeof(u_int32_t); \
}
#define SWAP16(p) { \
P_16_SWAP(p); \
(p) += sizeof(u_int16_t); \
}
#endif /* !_DB_SWAP_H_ */

211
db2/include/hash.h Normal file
View File

@ -0,0 +1,211 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994
* Margo Seltzer. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)hash.h 10.6 (Sleepycat) 8/18/97
*/
/* Cursor structure definitions. */
typedef struct cursor_t {
DBC *db_cursor;
db_pgno_t bucket; /* Bucket we are traversing. */
DB_LOCK lock; /* Lock held on the current bucket. */
PAGE *pagep; /* The current page. */
db_pgno_t pgno; /* Current page number. */
db_indx_t bndx; /* Index within the current page. */
PAGE *dpagep; /* Duplicate page pointer. */
db_pgno_t dpgno; /* Duplicate page number. */
db_indx_t dndx; /* Index within a duplicate set. */
db_indx_t dup_off; /* Offset within a duplicate set. */
db_indx_t dup_len; /* Length of current duplicate. */
db_indx_t dup_tlen; /* Total length of duplicate entry. */
u_int32_t seek_size; /* Number of bytes we need for add. */
db_pgno_t seek_found_page;/* Page on which we can insert. */
u_int32_t big_keylen; /* Length of big_key buffer. */
void *big_key; /* Temporary buffer for big keys. */
u_int32_t big_datalen; /* Length of big_data buffer. */
void *big_data; /* Temporary buffer for big data. */
#define H_OK 0x0001
#define H_NOMORE 0x0002
#define H_DELETED 0x0004
#define H_ISDUP 0x0008
#define H_EXPAND 0x0020
u_int32_t flags; /* Is cursor inside a dup set. */
} HASH_CURSOR;
#define IS_VALID(C) ((C)->bucket != BUCKET_INVALID)
typedef struct htab { /* Memory resident data structure. */
DB *dbp; /* Pointer to parent db structure. */
DB_LOCK hlock; /* Metadata page lock. */
HASHHDR *hdr; /* Pointer to meta-data page. */
u_int32_t (*hash) __P((const void *, u_int32_t)); /* Hash Function */
PAGE *split_buf; /* Temporary buffer for splits. */
int local_errno; /* Error Number -- for DBM compatability */
u_long hash_accesses; /* Number of accesses to this table. */
u_long hash_collisions; /* Number of collisions on search. */
u_long hash_expansions; /* Number of times we added a bucket. */
u_long hash_overflows; /* Number of overflow pages. */
u_long hash_bigpages; /* Number of big key/data pages. */
} HTAB;
/*
* Macro used for interface functions to set the txnid in the DBP.
*/
#define SET_LOCKER(D, T) ((D)->txn = (T))
/*
* More interface macros used to get/release the meta data page.
*/
#define GET_META(D, H) { \
int _r; \
if (F_ISSET(D, DB_AM_LOCKING) && !F_ISSET(D, DB_AM_RECOVER)) { \
(D)->lock.pgno = BUCKET_INVALID; \
if ((_r = lock_get((D)->dbenv->lk_info, \
(D)->txn == NULL ? (D)->locker : (D)->txn->txnid, \
0, &(D)->lock_dbt, DB_LOCK_READ, \
&(H)->hlock)) != 0) \
return (_r < 0 ? EAGAIN : _r); \
} \
if ((_r = __ham_get_page(D, 0, (PAGE **)&((H)->hdr))) != 0) { \
if ((H)->hlock) { \
(void)lock_put((D)->dbenv->lk_info, (H)->hlock);\
(H)->hlock = 0; \
} \
return (_r); \
} \
}
#define RELEASE_META(D, H) { \
if (!F_ISSET(D, DB_AM_RECOVER) && \
(D)->txn == NULL && (H)->hlock) \
(void)lock_put((H)->dbp->dbenv->lk_info, (H)->hlock); \
(H)->hlock = 0; \
if ((H)->hdr) \
(void)__ham_put_page(D, (PAGE *)(H)->hdr, \
F_ISSET(D, DB_HS_DIRTYMETA) ? 1 : 0); \
(H)->hdr = NULL; \
F_CLR(D, DB_HS_DIRTYMETA); \
}
#define DIRTY_META(H, R) { \
if (F_ISSET((H)->dbp, DB_AM_LOCKING) && \
!F_ISSET((H)->dbp, DB_AM_RECOVER)) { \
DB_LOCK _tmp; \
(H)->dbp->lock.pgno = BUCKET_INVALID; \
if (((R) = lock_get((H)->dbp->dbenv->lk_info, \
(H)->dbp->txn ? (H)->dbp->txn->txnid : \
(H)->dbp->locker, 0, &(H)->dbp->lock_dbt, \
DB_LOCK_WRITE, &_tmp)) == 0) \
(R) = lock_put((H)->dbp->dbenv->lk_info, \
(H)->hlock); \
else if ((R) < 0) \
(R) = EAGAIN; \
(H)->hlock = _tmp; \
} \
F_SET((H)->dbp, DB_HS_DIRTYMETA); \
}
/* Allocate and discard thread structures. */
#define H_GETHANDLE(dbp, dbpp, ret) \
if (F_ISSET(dbp, DB_AM_THREAD)) \
ret = __db_gethandle(dbp, __ham_hdup, dbpp); \
else { \
ret = 0; \
*dbpp = dbp; \
}
#define H_PUTHANDLE(dbp) { \
if (F_ISSET(dbp, DB_AM_THREAD)) \
__db_puthandle(dbp); \
}
/* Test string. */
#define CHARKEY "%$sniglet^&"
/* Overflow management */
/*
* Overflow page numbers are allocated per split point. At each doubling of
* the table, we can allocate extra pages. We keep track of how many pages
* we've allocated at each point to calculate bucket to page number mapping.
*/
#define BUCKET_TO_PAGE(H, B) \
((B) + 1 + ((B) ? (H)->hdr->spares[__db_log2((B)+1)-1] : 0))
#define PGNO_OF(H, S, O) (BUCKET_TO_PAGE((H), (1 << (S)) - 1) + (O))
/* Constraints about number of pages and how much data goes on a page. */
#define MAX_PAGES(H) UINT32_T_MAX
#define MINFILL 0.25
#define ISBIG(H, N) (((N) > ((H)->hdr->pagesize * MINFILL)) ? 1 : 0)
/* Shorthands for accessing structure */
#define NDX_INVALID 0xFFFF
#define BUCKET_INVALID 0xFFFFFFFF
/* On page duplicates are stored as a string of size-data-size triples. */
#define DUP_SIZE(len) ((len) + 2 * sizeof(db_indx_t))
/* Log messages types (these are subtypes within a record type) */
#define PAIR_KEYMASK 0x1
#define PAIR_DATAMASK 0x2
#define PAIR_ISKEYBIG(N) (N & PAIR_KEYMASK)
#define PAIR_ISDATABIG(N) (N & PAIR_DATAMASK)
#define OPCODE_OF(N) (N & ~(PAIR_KEYMASK | PAIR_DATAMASK))
#define PUTPAIR 0x20
#define DELPAIR 0x30
#define PUTOVFL 0x40
#define DELOVFL 0x50
#define ALLOCPGNO 0x60
#define DELPGNO 0x70
#define SPLITOLD 0x80
#define SPLITNEW 0x90
#include "hash_auto.h"
#include "hash_ext.h"
#include "db_am.h"
#include "common_ext.h"

114
db2/include/hash_auto.h Normal file
View File

@ -0,0 +1,114 @@
/* Do not edit: automatically built by dist/db_gen.sh. */
#ifndef ham_AUTO_H
#define ham_AUTO_H
#define DB_ham_insdel (DB_ham_BEGIN + 1)
typedef struct _ham_insdel_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t opcode;
u_int32_t fileid;
db_pgno_t pgno;
u_int32_t ndx;
DB_LSN pagelsn;
DBT key;
DBT data;
} __ham_insdel_args;
#define DB_ham_newpage (DB_ham_BEGIN + 2)
typedef struct _ham_newpage_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t opcode;
u_int32_t fileid;
db_pgno_t prev_pgno;
DB_LSN prevlsn;
db_pgno_t new_pgno;
DB_LSN pagelsn;
db_pgno_t next_pgno;
DB_LSN nextlsn;
} __ham_newpage_args;
#define DB_ham_splitmeta (DB_ham_BEGIN + 3)
typedef struct _ham_splitmeta_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t fileid;
u_int32_t bucket;
u_int32_t ovflpoint;
u_int32_t spares;
DB_LSN metalsn;
} __ham_splitmeta_args;
#define DB_ham_splitdata (DB_ham_BEGIN + 4)
typedef struct _ham_splitdata_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t fileid;
u_int32_t opcode;
db_pgno_t pgno;
DBT pageimage;
DB_LSN pagelsn;
} __ham_splitdata_args;
#define DB_ham_replace (DB_ham_BEGIN + 5)
typedef struct _ham_replace_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t fileid;
db_pgno_t pgno;
u_int32_t ndx;
DB_LSN pagelsn;
int32_t off;
DBT olditem;
DBT newitem;
u_int32_t makedup;
} __ham_replace_args;
#define DB_ham_newpgno (DB_ham_BEGIN + 6)
typedef struct _ham_newpgno_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t opcode;
u_int32_t fileid;
db_pgno_t pgno;
db_pgno_t free_pgno;
u_int32_t old_type;
db_pgno_t old_pgno;
u_int32_t new_type;
DB_LSN pagelsn;
DB_LSN metalsn;
} __ham_newpgno_args;
#define DB_ham_ovfl (DB_ham_BEGIN + 7)
typedef struct _ham_ovfl_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t fileid;
db_pgno_t start_pgno;
u_int32_t npages;
db_pgno_t free_pgno;
DB_LSN metalsn;
} __ham_ovfl_args;
#endif

120
db2/include/hash_ext.h Normal file
View File

@ -0,0 +1,120 @@
/* Do not edit: automatically built by dist/distrib. */
int __ham_open __P((DB *, DB_INFO *));
int __ham_close __P((DB *));
int __ham_expand_table __P((HTAB *));
u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t));
int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
void __ham_c_update __P((HTAB *,
HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
int __ham_hdup __P((DB *, DB *));
int __ham_insdel_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, u_int32_t, db_pgno_t, u_int32_t,
DB_LSN *, DBT *, DBT *));
int __ham_insdel_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __ham_insdel_read __P((void *, __ham_insdel_args **));
int __ham_newpage_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, u_int32_t, db_pgno_t, DB_LSN *,
db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *));
int __ham_newpage_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __ham_newpage_read __P((void *, __ham_newpage_args **));
int __ham_splitmeta_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, u_int32_t, u_int32_t, u_int32_t,
DB_LSN *));
int __ham_splitmeta_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __ham_splitmeta_read __P((void *, __ham_splitmeta_args **));
int __ham_splitdata_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, u_int32_t, db_pgno_t, DBT *,
DB_LSN *));
int __ham_splitdata_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __ham_splitdata_read __P((void *, __ham_splitdata_args **));
int __ham_replace_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, db_pgno_t, u_int32_t, DB_LSN *,
int32_t, DBT *, DBT *, u_int32_t));
int __ham_replace_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __ham_replace_read __P((void *, __ham_replace_args **));
int __ham_newpgno_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, u_int32_t, db_pgno_t, db_pgno_t,
u_int32_t, db_pgno_t, u_int32_t, DB_LSN *,
DB_LSN *));
int __ham_newpgno_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __ham_newpgno_read __P((void *, __ham_newpgno_args **));
int __ham_ovfl_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t, db_pgno_t, u_int32_t, db_pgno_t,
DB_LSN *));
int __ham_ovfl_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __ham_ovfl_read __P((void *, __ham_ovfl_args **));
int __ham_init_print __P((DB_ENV *));
int __ham_init_recover __P((DB_ENV *));
int __ham_pgin __P((db_pgno_t, void *, DBT *));
int __ham_pgout __P((db_pgno_t, void *, DBT *));
int __ham_mswap __P((void *));
#ifdef DEBUG
void __ham_dump_bucket __P((HTAB *, u_int32_t));
#endif
int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, int));
void __ham_move_offpage __P((HTAB *, PAGE *, u_int32_t, db_pgno_t));
u_int32_t __ham_func2 __P((const void *, u_int32_t));
u_int32_t __ham_func3 __P((const void *, u_int32_t));
u_int32_t __ham_func4 __P((const void *, u_int32_t));
u_int32_t __ham_func5 __P((const void *, u_int32_t));
int __ham_item __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
int __ham_item_reset __P((HTAB *, HASH_CURSOR *));
void __ham_item_init __P((HASH_CURSOR *));
int __ham_item_done __P((HTAB *, HASH_CURSOR *, int));
int __ham_item_last __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
int __ham_item_first __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
int __ham_item_prev __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
int __ham_item_next __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
void __ham_putitem __P((PAGE *p, const DBT *, int));
int __ham_del_pair __P((HTAB *, HASH_CURSOR *));
int __ham_replpair __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t,
int32_t, DBT *));
int __ham_split_page __P((HTAB *, u_int32_t, u_int32_t));
int __ham_add_el __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *,
int));
void __ham_copy_item __P((HTAB *, PAGE *, int, PAGE *));
int __ham_add_ovflpage __P((HTAB *, PAGE *, int, PAGE **));
int __ham_new_page __P((HTAB *, u_int32_t, u_int32_t, PAGE **));
int __ham_del_page __P((DB *, PAGE *));
int __ham_put_page __P((DB *, PAGE *, int32_t));
int __ham_dirty_page __P((HTAB *, PAGE *));
int __ham_get_page __P((DB *, db_pgno_t, PAGE **));
int __ham_overflow_page __P((DB *, u_int32_t, PAGE **));
#ifdef DEBUG
int bucket_to_page __P((HTAB *, int));
#endif
void __ham_init_ovflpages __P((HTAB *));
int __ham_get_cpage __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
int __ham_next_cpage __P((HTAB *, HASH_CURSOR *, db_pgno_t,
int, int));
void __ham_dpair __P((DB *, PAGE *, u_int32_t));
int __ham_insdel_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __ham_newpage_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __ham_replace_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __ham_newpgno_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __ham_splitmeta_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __ham_splitdata_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __ham_ovfl_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __ham_stat __P((DB *, FILE *));

194
db2/include/lock.h Normal file
View File

@ -0,0 +1,194 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
* @(#)lock.h 10.7 (Sleepycat) 7/29/97
*/
typedef struct __db_lockobj DB_LOCKOBJ;
#define DB_DEFAULT_LOCK_FILE "__db_lock.share"
#define DB_LOCK_DEFAULT_N 5000
#define DB_LOCK_MAXID 0x7fffffff
/*
* The lock region consists of:
* The DB_LOCKREGION structure (sizeof(DB_LOCKREGION)).
* The conflict matrix of nmodes * nmodes bytes (nmodes * nmodes).
* The hash table for object lookup (hashsize * sizeof(DB_OBJ *)).
* The locks themselves (maxlocks * sizeof(struct __db_lock).
* The objects being locked (maxlocks * sizeof(DB_OBJ)).
* String space to represent the DBTs that are the objects being locked.
*/
struct __db_lockregion {
RLAYOUT hdr; /* Shared region header. */
u_int32_t magic; /* lock magic number */
u_int32_t version; /* version number */
u_int32_t id; /* unique id generator */
u_int32_t need_dd; /* flag for deadlock detector */
u_int32_t detect; /* run dd on every conflict */
SH_TAILQ_HEAD(lock_header) free_locks; /* free lock header */
SH_TAILQ_HEAD(obj_header) free_objs; /* free obj header */
u_int32_t maxlocks; /* maximum number of locks in table */
u_int32_t table_size; /* size of hash table */
u_int32_t nmodes; /* number of lock modes */
u_int32_t numobjs; /* number of objects */
u_int32_t nlockers; /* number of lockers */
size_t increment; /* how much to grow region */
size_t hash_off; /* offset of hash table */
size_t mem_off; /* offset of memory region */
size_t mem_bytes; /* number of bytes in memory region */
u_int32_t nconflicts; /* number of lock conflicts */
u_int32_t nrequests; /* number of lock gets */
u_int32_t nreleases; /* number of lock puts */
u_int32_t ndeadlocks; /* number of deadlocks */
};
/* Macros to lock/unlock the region. */
#define LOCK_LOCKREGION(lt) \
(void)__db_mutex_lock(&(lt)->region->hdr.lock,(lt)->fd, \
(lt)->dbenv == NULL ? NULL : (lt)->dbenv->db_yield)
#define UNLOCK_LOCKREGION(lt) \
(void)__db_mutex_unlock(&(lt)->region->hdr.lock, (lt)->fd)
/*
* Since we will be keeping DBTs in shared memory, we need the equivalent
* of a DBT that will work in shared memory.
*/
typedef struct __sh_dbt {
u_int32_t size;
ssize_t off;
} SH_DBT;
#define SH_DBT_PTR(p) ((void *)(((u_int8_t *)(p)) + (p)->off))
/*
* The lock table is the per-process cookie returned from a lock_open call.
*/
struct __db_lockobj {
SH_DBT lockobj; /* Identifies object locked. */
SH_TAILQ_ENTRY links; /* Links for free list. */
union {
SH_TAILQ_HEAD(_wait) _waiters; /* List of waiting locks. */
u_int32_t _dd_id; /* Deadlock detector id. */
} wlinks;
union {
SH_LIST_HEAD(_held) _heldby; /* Locks held by this locker. */
SH_TAILQ_HEAD(_hold) _holders; /* List of held locks. */
} dlinks;
#define DB_LOCK_OBJTYPE 1
#define DB_LOCK_LOCKER 2
u_int8_t type; /* Real object or locker id. */
};
#define dd_id wlinks._dd_id
#define waiters wlinks._waiters
#define holders dlinks._holders
#define heldby dlinks._heldby
struct __db_locktab {
DB_ENV *dbenv; /* Environment. */
int fd; /* mapped file descriptor */
DB_LOCKREGION *region; /* address of shared memory region */
DB_HASHTAB *hashtab; /* Beginning of hash table. */
size_t reg_size; /* last known size of lock region */
void *mem; /* Beginning of string space. */
u_int8_t *conflicts; /* Pointer to conflict matrix. */
};
/* Test for conflicts. */
#define CONFLICTS(T, HELD, WANTED) \
T->conflicts[HELD * T->region->nmodes + WANTED]
/*
* Status of a lock.
*/
typedef enum {
DB_LSTAT_ABORTED, /* Lock belongs to an aborted txn. */
DB_LSTAT_ERR, /* Lock is bad. */
DB_LSTAT_FREE, /* Lock is unallocated. */
DB_LSTAT_HELD, /* Lock is currently held. */
DB_LSTAT_NOGRANT, /* Lock was not granted. */
DB_LSTAT_PENDING, /* Lock was waiting and has been
* promoted; waiting for the owner
* to run and upgrade it to held. */
DB_LSTAT_WAITING /* Lock is on the wait queue. */
} db_status_t;
/*
* Resources in the lock region. Used to indicate which resource
* is running low when we need to grow the region.
*/
typedef enum {
DB_LOCK_MEM, DB_LOCK_OBJ, DB_LOCK_LOCK
} db_resource_t;
struct __db_lock {
/*
* Wait on mutex to wait on lock. You reference your own mutex with
* ID 0 and others reference your mutex with ID 1.
*/
db_mutex_t mutex;
u_int32_t holder; /* Who holds this lock. */
SH_TAILQ_ENTRY links; /* Free or holder/waiter list. */
SH_LIST_ENTRY locker_links; /* List of locks held by a locker. */
u_int32_t refcount; /* Reference count the lock. */
db_lockmode_t mode; /* What sort of lock. */
ssize_t obj; /* Relative offset of object struct. */
db_status_t status; /* Status of this lock. */
};
/*
* We cannot return pointers to the user (else we cannot easily grow regions),
* so we return offsets in the region. These must be converted to and from
* regular pointers. Always use the macros below.
*/
#define OFFSET_TO_LOCK(lt, off) \
((struct __db_lock *)((u_int8_t *)((lt)->region) + (off)))
#define LOCK_TO_OFFSET(lt, lock) \
((size_t)((u_int8_t *)(lock) - (u_int8_t *)lt->region))
#define OFFSET_TO_OBJ(lt, off) \
((DB_LOCKOBJ *)((u_int8_t *)((lt)->region) + (off)))
#define OBJ_TO_OFFSET(lt, obj) \
((size_t)((u_int8_t *)(obj) - (u_int8_t *)lt->region))
/*
* The lock header contains the region structure and the conflict matrix.
* Aligned to a large boundary because we don't know what the underlying
* type of the hash table elements are.
*/
#define LOCK_HASH_ALIGN 8
#define LOCK_HEADER_SIZE(M) \
((size_t)(sizeof(DB_LOCKREGION) + ALIGN((M * M), LOCK_HASH_ALIGN)))
/*
* For the full region, we need to add the locks, the objects, the hash table
* and the string space (which is 16 bytes per lock).
*/
#define STRING_SIZE(N) (16 * N)
#define LOCK_REGION_SIZE(M, N, H) \
(ALIGN(LOCK_HEADER_SIZE(M) + \
(H) * sizeof(DB_HASHTAB), MUTEX_ALIGNMENT) + \
(N) * ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT) + \
ALIGN((N) * sizeof(DB_LOCKOBJ), sizeof(size_t)) + \
ALIGN(STRING_SIZE(N), sizeof(size_t)))
#ifdef DEBUG
#define LOCK_DEBUG_LOCKERS 0x0001
#define LOCK_DEBUG_LOCK 0x0002
#define LOCK_DEBUG_OBJ 0x0004
#define LOCK_DEBUG_CONF 0x0008
#define LOCK_DEBUG_MEM 0x0010
#define LOCK_DEBUG_BUCKET 0x0020
#define LOCK_DEBUG_OBJECTS 0x0040
#define LOCK_DEBUG_ALL 0xFFFF
#define LOCK_DEBUG_NOMUTEX 0x0100
#endif
#include "lock_ext.h"

8
db2/include/lock_ext.h Normal file
View File

@ -0,0 +1,8 @@
/* Do not edit: automatically built by dist/distrib. */
int __lock_getobj __P((DB_LOCKTAB *,
u_int32_t, DBT *, u_int32_t type, DB_LOCKOBJ **));
int __lock_cmp __P((DBT *, DB_LOCKOBJ *));
int __lock_locker_cmp __P((u_int32_t, DB_LOCKOBJ *));
int __lock_ohash __P((DBT *));
u_int32_t __lock_locker_hash __P((u_int32_t));
u_int32_t __lock_lhash __P((DB_LOCKOBJ *));

157
db2/include/log.h Normal file
View File

@ -0,0 +1,157 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
* @(#)log.h 10.8 (Sleepycat) 8/18/97
*/
#ifndef _LOG_H_
#define _LOG_H_
struct __fname; typedef struct __fname FNAME;
struct __hdr; typedef struct __hdr HDR;
struct __log; typedef struct __log LOG;
struct __log_persist; typedef struct __log_persist LOGP;
#define MAXLFNAME 99999 /* Maximum log file name. */
#define LFNAME "log.%05d" /* Log file name template. */
/* Default log name. */
#define DB_DEFAULT_LOG_FILE "__db_log.share"
#define DEFAULT_MAX (10 * 1048576) /* 10 Mb. */
/* Macros to return per-process address, offsets. */
#define ADDR(base, offset) ((void *)((u_int8_t *)((base)->addr) + offset))
#define OFFSET(base, p) ((u_int8_t *)(p) - (u_int8_t *)(base)->addr)
/* Macros to lock/unlock the region and threads. */
#define LOCK_LOGTHREAD(dblp) \
if (F_ISSET(dblp, DB_AM_THREAD)) \
(void)__db_mutex_lock(&(dblp)->mutex, -1, \
(dblp)->dbenv == NULL ? NULL : (dblp)->dbenv->db_yield)
#define UNLOCK_LOGTHREAD(dblp) \
if (F_ISSET(dblp, DB_AM_THREAD)) \
(void)__db_mutex_unlock(&(dblp)->mutex, -1);
#define LOCK_LOGREGION(dblp) \
(void)__db_mutex_lock(&((RLAYOUT *)(dblp)->lp)->lock, \
(dblp)->fd, (dblp)->dbenv == NULL ? NULL : (dblp)->dbenv->db_yield)
#define UNLOCK_LOGREGION(dblp) \
(void)__db_mutex_unlock(&((RLAYOUT *)(dblp)->lp)->lock, (dblp)->fd)
/*
* The per-process table that maps log file-id's to DB structures.
*/
typedef struct __db_entry {
DB *dbp; /* Associated DB structure. */
int refcount; /* Reference counted. */
int deleted; /* File was not found during open. */
} DB_ENTRY;
/*
* DB_LOG
* Per-process log structure.
*/
struct __db_log {
/* These fields need to be protected for multi-threaded support. */
db_mutex_t mutex; /* Mutex for thread protection. */
DB_ENTRY *dbentry; /* Recovery file-id mapping. */
#define DB_GROW_SIZE 64
u_int32_t dbentry_cnt; /* Entries. Grows by DB_GROW_SIZE. */
/*
* These fields are always accessed while the region lock is held, so they do
* not have to be protected by the thread lock as well OR, they are only used
* when threads are not being used, i.e. most cursor operations are disallowed
* on threaded logs.
*/
u_int32_t lfname; /* Log file "name". */
int lfd; /* Log file descriptor. */
DB_LSN c_lsn; /* Cursor: current LSN. */
DBT c_dbt; /* Cursor: return DBT structure. */
int c_fd; /* Cursor: file descriptor. */
u_int32_t c_off; /* Cursor: previous record offset. */
u_int32_t c_len; /* Cursor: current record length. */
/* These fields are not protected. */
LOG *lp; /* Address of the shared LOG. */
DB_ENV *dbenv; /* Reference to error information. */
void *maddr; /* Address of mmap'd region. */
void *addr; /* Address of shalloc() region. */
int fd; /* Region file descriptor. */
u_int32_t flags; /* Support the DB_AM_XXX flags. */
};
/*
* HDR --
* Log record header.
*/
struct __hdr {
u_int32_t prev; /* Previous offset. */
u_int32_t cksum; /* Current checksum. */
u_int32_t len; /* Current length. */
};
struct __log_persist {
u_int32_t magic; /* DB_LOGMAGIC */
u_int32_t version; /* DB_LOGVERSION */
u_int32_t lg_max; /* Maximum file size. */
int mode; /* Log file mode. */
};
/*
* LOG --
* Shared log region. One of these is allocated in shared memory,
* and describes the log.
*/
struct __log {
RLAYOUT rlayout; /* General region information. */
LOGP persist; /* Persistent information. */
SH_TAILQ_HEAD(__fq) fq; /* List of file names. */
DB_LSN lsn; /* LSN at current file offset. */
DB_LSN c_lsn; /* LSN of the last checkpoint. */
DB_LSN s_lsn; /* LSN of the last sync. */
DB_LSN span_lsn; /* LSN spanning buffer write. */
u_int32_t len; /* Length of the last record. */
size_t b_off; /* Current offset in the buffer. */
u_int32_t w_off; /* Current write offset in the file. */
time_t chkpt; /* Time of the last checkpoint. */
u_int32_t written; /* Bytes written since checkpoint. */
u_int8_t buf[4 * 1024]; /* Log buffer. */
};
/*
* FNAME --
* File name and id.
*/
struct __fname {
SH_TAILQ_ENTRY q; /* File name queue. */
u_int16_t ref; /* Reference count. */
u_int32_t id; /* Logging file id. */
DBTYPE s_type; /* Saved DB type. */
u_int32_t fileid_off; /* Unique file id offset. */
size_t name_off; /* Name offset. */
};
#include "log_auto.h"
#include "log_ext.h"
#endif /* _LOG_H_ */

27
db2/include/log_auto.h Normal file
View File

@ -0,0 +1,27 @@
/* Do not edit: automatically built by dist/db_gen.sh. */
#ifndef log_AUTO_H
#define log_AUTO_H
#define DB_log_register (DB_log_BEGIN + 1)
typedef struct _log_register_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
DBT name;
DBT uid;
u_int32_t id;
DBTYPE ftype;
} __log_register_args;
#define DB_log_unregister (DB_log_BEGIN + 2)
typedef struct _log_unregister_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t id;
} __log_unregister_args;
#endif

29
db2/include/log_ext.h Normal file
View File

@ -0,0 +1,29 @@
/* Do not edit: automatically built by dist/distrib. */
int __log_find __P((DB_ENV *, LOG *, int *));
int __log_valid __P((DB_ENV *, LOG *, int));
int __log_register_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
DBT *, DBT *, u_int32_t, DBTYPE));
int __log_register_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __log_register_read __P((void *, __log_register_args **));
int __log_unregister_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t));
int __log_unregister_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __log_unregister_read __P((void *, __log_unregister_args **));
int __log_init_print __P((DB_ENV *));
int __log_init_recover __P((DB_ENV *));
int __log_findckp __P((DB_LOG *, DB_LSN *));
int __log_get __P((DB_LOG *, DB_LSN *, DBT *, int, int));
int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, int));
int __log_name __P((DB_ENV *, int, char **));
int __log_register_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __log_unregister_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __log_add_logid __P((DB_LOG *, DB *, u_int32_t));
int __db_fileid_to_db __P((DB_LOG *, DB **, u_int32_t));
void __log_close_files __P((DB_LOG *));
void __log_rem_logid __P((DB_LOG *, u_int32_t));

266
db2/include/mp.h Normal file
View File

@ -0,0 +1,266 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
* @(#)mp.h 10.14 (Sleepycat) 8/18/97
*/
struct __bh; typedef struct __bh BH;
struct __db_mpreg; typedef struct __db_mpreg DB_MPREG;
struct __mpool; typedef struct __mpool MPOOL;
struct __mpoolfile; typedef struct __mpoolfile MPOOLFILE;
/* Default mpool name. */
#define DB_DEFAULT_MPOOL_FILE "__db_mpool.share"
/*
* We default to 128K (16 8K pages) if the user doesn't specify, and
* require a minimum of 20K.
*/
#define DB_CACHESIZE_DEF (128 * 1024)
#define DB_CACHESIZE_MIN ( 20 * 1024)
/* Macro to return per-process address, offsets. */
#define ADDR(base, offset) ((void *)((u_int8_t *)((base)->addr) + offset))
#define OFFSET(base, p) ((u_int8_t *)(p) - (u_int8_t *)(base)->addr)
#define INVALID 0 /* Invalid shared memory offset. */
#define TEMPORARY "<tmp>" /* Temporary file name. */
/*
* There are two kinds of locks in the mpool code. The first is the region
* lock, used to serialize modifications to all data structures. The second
* is a per-buffer header lock. The locking order is as follows:
*
* Process searching for a buffer:
* Acquire the region lock.
* Find the buffer header.
* Increment the reference count (guarantee the buffer stays).
* If the BH_LOCKED flag is set:
* Release the region lock.
* Acquire the buffer lock.
* Release the buffer lock.
* Acquire the region lock.
* Return the buffer.
*
* Process reading/writing a buffer:
* Acquire the region lock.
* Find/create the buffer header.
* If reading, increment the reference count (guarantee the buffer stays).
* Set the BH_LOCKED flag.
* Acquire the buffer lock (guaranteed not to block).
* Release the region lock.
* Do the I/O and/or initialize buffer contents.
* Acquire the region lock.
* Clear the BH_LOCKED flag.
* Release the region lock.
* Release the buffer lock.
* If reading, return the buffer.
*
* Pointers to DB_MPOOL, MPOOL, DB_MPOOLFILE and MPOOLFILE structures are not
* reacquired when a region lock is reacquired because they couldn't have been
* closed/discarded and because they never move in memory.
*/
#define LOCKINIT(dbmp, mutexp) \
if (F_ISSET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION)) \
(void)__db_mutex_init(mutexp, (dbmp)->fd)
#define LOCKHANDLE(dbmp, mutexp) \
if (F_ISSET(dbmp, MP_LOCKHANDLE)) \
(void)__db_mutex_lock(mutexp, (dbmp)->fd, \
(dbmp)->dbenv == NULL ? NULL : (dbmp)->dbenv->db_yield)
#define UNLOCKHANDLE(dbmp, mutexp) \
if (F_ISSET(dbmp, MP_LOCKHANDLE)) \
(void)__db_mutex_unlock(mutexp, (dbmp)->fd)
#define LOCKREGION(dbmp) \
if (F_ISSET(dbmp, MP_LOCKREGION)) \
(void)__db_mutex_lock(&((RLAYOUT *)(dbmp)->mp)->lock, \
(dbmp)->fd, \
(dbmp)->dbenv == NULL ? NULL : (dbmp)->dbenv->db_yield)
#define UNLOCKREGION(dbmp) \
if (F_ISSET(dbmp, MP_LOCKREGION)) \
(void)__db_mutex_unlock(&((RLAYOUT *)(dbmp)->mp)->lock, \
(dbmp)->fd)
#define LOCKBUFFER(dbmp, bhp) \
if (F_ISSET(dbmp, MP_LOCKREGION)) \
(void)__db_mutex_lock(&(bhp)->mutex, (dbmp)->fd, \
(dbmp)->dbenv == NULL ? NULL : (dbmp)->dbenv->db_yield)
#define UNLOCKBUFFER(dbmp, bhp) \
if (F_ISSET(dbmp, MP_LOCKREGION)) \
(void)__db_mutex_unlock(&(bhp)->mutex, (dbmp)->fd)
/*
* DB_MPOOL --
* Per-process memory pool structure.
*/
struct __db_mpool {
/* These fields need to be protected for multi-threaded support. */
db_mutex_t mutex; /* Structure lock. */
/* List of pgin/pgout routines. */
LIST_HEAD(__db_mpregh, __db_mpreg) dbregq;
/* List of DB_MPOOLFILE's. */
TAILQ_HEAD(__db_mpoolfileh, __db_mpoolfile) dbmfq;
/* These fields are not protected. */
DB_ENV *dbenv; /* Reference to error information. */
MPOOL *mp; /* Address of the shared MPOOL. */
void *maddr; /* Address of mmap'd region. */
void *addr; /* Address of shalloc() region. */
DB_HASHTAB *htab; /* Hash table of bucket headers. */
int fd; /* Underlying mmap'd fd. */
#define MP_ISPRIVATE 0x01 /* Private, so local memory. */
#define MP_LOCKHANDLE 0x02 /* Threaded, lock handles and region. */
#define MP_LOCKREGION 0x04 /* Concurrent access, lock region. */
u_int32_t flags;
};
/*
* DB_MPREG --
* DB_MPOOL registry of pgin/pgout functions.
*/
struct __db_mpreg {
LIST_ENTRY(__db_mpreg) q; /* Linked list. */
int ftype; /* File type. */
/* Pgin, pgout routines. */
int (*pgin) __P((db_pgno_t, void *, DBT *));
int (*pgout) __P((db_pgno_t, void *, DBT *));
};
/*
* DB_MPOOLFILE --
* Per-process DB_MPOOLFILE information.
*/
struct __db_mpoolfile {
/* These fields need to be protected for multi-threaded support. */
db_mutex_t mutex; /* Structure lock. */
int fd; /* Underlying file descriptor. */
u_int32_t pinref; /* Pinned block reference count. */
/* These fields are not protected. */
TAILQ_ENTRY(__db_mpoolfile) q; /* Linked list of DB_MPOOLFILE's. */
char *path; /* Initial file path. */
DB_MPOOL *dbmp; /* Overlying DB_MPOOL. */
MPOOLFILE *mfp; /* Underlying MPOOLFILE. */
void *addr; /* Address of mmap'd region. */
size_t len; /* Length of mmap'd region. */
#define MP_PATH_ALLOC 0x01 /* Path is allocated memory. */
#define MP_PATH_TEMP 0x02 /* Backing file is a temporary. */
#define MP_READONLY 0x04 /* File is readonly. */
u_int32_t flags;
};
/*
* MPOOL --
* Shared memory pool region. One of these is allocated in shared
* memory, and describes the pool.
*/
struct __mpool {
RLAYOUT rlayout; /* General region information. */
SH_TAILQ_HEAD(__bhq) bhq; /* LRU list of buckets. */
SH_TAILQ_HEAD(__bhfq) bhfq; /* Free buckets. */
SH_TAILQ_HEAD(__mpfq) mpfq; /* List of MPOOLFILEs. */
/*
* We make the assumption that the early pages of the file are far
* more likely to be retrieved than the later pages, which means
* that the top bits are more interesting for hashing since they're
* less likely to collide. On the other hand, since 512 4K pages
* represents a 2MB file, only the bottom 9 bits of the page number
* are likely to be set. We XOR in the offset in the MPOOL of the
* MPOOLFILE that backs this particular page, since that should also
* be unique for the page.
*/
#define BUCKET(mp, mf_offset, pgno) \
(((pgno) ^ ((mf_offset) << 9)) % (mp)->htab_buckets)
size_t htab; /* Hash table offset. */
size_t htab_buckets; /* Number of hash table entries. */
DB_LSN lsn; /* Maximum checkpoint LSN. */
int lsn_cnt; /* Checkpoint buffers left to write. */
DB_MPOOL_STAT stat; /* Global mpool statistics. */
#define MP_LSN_RETRY 0x01 /* Retry all BH_WRITE buffers. */
u_int32_t flags;
};
/*
* MPOOLFILE --
* Shared DB_MPOOLFILE information.
*/
struct __mpoolfile {
SH_TAILQ_ENTRY q; /* List of MPOOLFILEs */
u_int32_t ref; /* Reference count. */
int ftype; /* File type. */
int can_mmap; /* If the file can be mmap'd. */
int lsn_off; /* Page's LSN offset. */
size_t path_off; /* File name location. */
size_t fileid_off; /* File identification location. */
size_t pgcookie_len; /* Pgin/pgout cookie length. */
size_t pgcookie_off; /* Pgin/pgout cookie location. */
int lsn_cnt; /* Checkpoint buffers left to write. */
DB_MPOOL_FSTAT stat; /* Per-file mpool statistics. */
};
/*
* BH --
* Buffer header.
*/
struct __bh {
db_mutex_t mutex; /* Structure lock. */
u_int16_t ref; /* Reference count. */
#define BH_CALLPGIN 0x001 /* Page needs to be reworked... */
#define BH_DIRTY 0x002 /* Page was modified. */
#define BH_DISCARD 0x004 /* Page is useless. */
#define BH_LOCKED 0x008 /* Page is locked (I/O in progress). */
#define BH_TRASH 0x010 /* Page is garbage. */
#define BH_WRITE 0x020 /* Page scheduled for writing. */
u_int16_t flags;
SH_TAILQ_ENTRY q; /* LRU list of bucket headers. */
SH_TAILQ_ENTRY mq; /* MPOOLFILE list of bucket headers. */
db_pgno_t pgno; /* Underlying MPOOLFILE page number. */
size_t mf_offset; /* Associated MPOOLFILE offset. */
/*
* !!!
* This array must be size_t aligned -- the DB access methods put PAGE
* and other structures into it, and expect to be able to access them
* directly. (We guarantee size_t alignment in the db_mpool(3) manual
* page as well.)
*/
u_int8_t buf[1]; /* Variable length data. */
};
#include "mp_ext.h"

14
db2/include/mp_ext.h Normal file
View File

@ -0,0 +1,14 @@
/* Do not edit: automatically built by dist/distrib. */
int __memp_bhwrite
__P((DB_MPOOL *, MPOOLFILE *, BH *, int *, int *));
int __memp_pgread __P((DB_MPOOLFILE *, BH *, int));
int __memp_pgwrite __P((DB_MPOOLFILE *, BH *, int *, int *));
int __memp_pg __P((DB_MPOOLFILE *, BH *, int));
void __memp_bhfree __P((DB_MPOOL *, MPOOLFILE *, BH *, int));
int __memp_fopen __P((DB_MPOOL *, const char *, int, int,
int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **));
void __memp_debug __P((DB_MPOOL *, FILE *, int));
int __memp_ralloc __P((DB_MPOOL *, size_t, size_t *, void *));
int __memp_ropen
__P((DB_MPOOL *, const char *, size_t, int, int));
int __memp_rclose __P((DB_MPOOL *));

4
db2/include/mutex_ext.h Normal file
View File

@ -0,0 +1,4 @@
/* Do not edit: automatically built by dist/distrib. */
void __db_mutex_init __P((db_mutex_t *, off_t));
int __db_mutex_lock __P((db_mutex_t *, int, int (*)(void)));
int __db_mutex_unlock __P((db_mutex_t *, int));

19
db2/include/os_ext.h Normal file
View File

@ -0,0 +1,19 @@
/* Do not edit: automatically built by dist/distrib. */
int __db_abspath __P((const char *));
char *__db_rpath __P((const char *));
int __db_dir __P((DB_ENV *, const char *, char ***, int *));
void __db_dirf __P((DB_ENV *, char **, int));
int __db_fileid __P((DB_ENV *, const char *, int, u_int8_t *));
int __db_lseek __P((int, size_t, db_pgno_t, u_long, int));
int __db_mmap __P((int, size_t, int, int, void *));
int __db_munmap __P((void *, size_t));
int __db_oflags __P((int));
int __db_fdopen __P((const char *, int, int, int, int *));
int __db_fsync __P((int));
int __db_close __P((int));
int __db_read __P((int, void *, size_t, ssize_t *));
int __db_write __P((int, void *, size_t, ssize_t *));
int __db_sleep __P((u_long, u_long));
int __db_exists __P((const char *, int *));
int __db_stat __P((DB_ENV *, const char *, int, off_t *, off_t *));
int __db_unlink __P((const char *));

275
db2/include/queue.h Normal file
View File

@ -0,0 +1,275 @@
/* BSDI $Id$ */
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)queue.h 8.5 (Berkeley) 8/20/94
*/
#ifndef _SYS_QUEUE_H_
#define _SYS_QUEUE_H_
/*
* This file defines three types of data structures: lists, tail queues,
* and circular queues.
*
* A list is headed by a single forward pointer (or an array of forward
* pointers for a hash table header). The elements are doubly linked
* so that an arbitrary element can be removed without a need to
* traverse the list. New elements can be added to the list before
* or after an existing element or at the head of the list. A list
* may only be traversed in the forward direction.
*
* A tail queue is headed by a pair of pointers, one to the head of the
* list and the other to the tail of the list. The elements are doubly
* linked so that an arbitrary element can be removed without a need to
* traverse the list. New elements can be added to the list before or
* after an existing element, at the head of the list, or at the end of
* the list. A tail queue may only be traversed in the forward direction.
*
* A circle queue is headed by a pair of pointers, one to the head of the
* list and the other to the tail of the list. The elements are doubly
* linked so that an arbitrary element can be removed without a need to
* traverse the list. New elements can be added to the list before or after
* an existing element, at the head of the list, or at the end of the list.
* A circle queue may be traversed in either direction, but has a more
* complex end of list detection.
*
* For details on the use of these macros, see the queue(3) manual page.
*/
/*
* List definitions.
*/
#define LIST_HEAD(name, type) \
struct name { \
struct type *lh_first; /* first element */ \
}
#define LIST_ENTRY(type) \
struct { \
struct type *le_next; /* next element */ \
struct type **le_prev; /* address of previous next element */ \
}
#define LIST_FIRST(head) ((head)->lh_first)
#define LIST_NEXT(elm, field) ((elm)->field.le_next)
#define LIST_END(head) NULL
/*
* List functions.
*/
#define LIST_INIT(head) { \
(head)->lh_first = NULL; \
}
#define LIST_INSERT_AFTER(listelm, elm, field) do { \
if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \
(listelm)->field.le_next->field.le_prev = \
&(elm)->field.le_next; \
(listelm)->field.le_next = (elm); \
(elm)->field.le_prev = &(listelm)->field.le_next; \
} while (0)
#define LIST_INSERT_BEFORE(listelm, elm, field) do { \
(elm)->field.le_prev = (listelm)->field.le_prev; \
(elm)->field.le_next = (listelm); \
*(listelm)->field.le_prev = (elm); \
(listelm)->field.le_prev = &(elm)->field.le_next; \
} while (0)
#define LIST_INSERT_HEAD(head, elm, field) do { \
if (((elm)->field.le_next = (head)->lh_first) != NULL) \
(head)->lh_first->field.le_prev = &(elm)->field.le_next;\
(head)->lh_first = (elm); \
(elm)->field.le_prev = &(head)->lh_first; \
} while (0)
#define LIST_REMOVE(elm, field) do { \
if ((elm)->field.le_next != NULL) \
(elm)->field.le_next->field.le_prev = \
(elm)->field.le_prev; \
*(elm)->field.le_prev = (elm)->field.le_next; \
} while (0)
/*
* Tail queue definitions.
*/
#define TAILQ_HEAD(name, type) \
struct name { \
struct type *tqh_first; /* first element */ \
struct type **tqh_last; /* addr of last next element */ \
}
#define TAILQ_ENTRY(type) \
struct { \
struct type *tqe_next; /* next element */ \
struct type **tqe_prev; /* address of previous next element */ \
}
#define TAILQ_FIRST(head) ((head)->tqh_first)
#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
#define TAILQ_END(head) NULL
/*
* Tail queue functions.
*/
#define TAILQ_INIT(head) do { \
(head)->tqh_first = NULL; \
(head)->tqh_last = &(head)->tqh_first; \
} while (0)
#define TAILQ_INSERT_HEAD(head, elm, field) do { \
if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \
(head)->tqh_first->field.tqe_prev = \
&(elm)->field.tqe_next; \
else \
(head)->tqh_last = &(elm)->field.tqe_next; \
(head)->tqh_first = (elm); \
(elm)->field.tqe_prev = &(head)->tqh_first; \
} while (0)
#define TAILQ_INSERT_TAIL(head, elm, field) do { \
(elm)->field.tqe_next = NULL; \
(elm)->field.tqe_prev = (head)->tqh_last; \
*(head)->tqh_last = (elm); \
(head)->tqh_last = &(elm)->field.tqe_next; \
} while (0)
#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\
(elm)->field.tqe_next->field.tqe_prev = \
&(elm)->field.tqe_next; \
else \
(head)->tqh_last = &(elm)->field.tqe_next; \
(listelm)->field.tqe_next = (elm); \
(elm)->field.tqe_prev = &(listelm)->field.tqe_next; \
} while (0)
#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \
(elm)->field.tqe_prev = (listelm)->field.tqe_prev; \
(elm)->field.tqe_next = (listelm); \
*(listelm)->field.tqe_prev = (elm); \
(listelm)->field.tqe_prev = &(elm)->field.tqe_next; \
} while (0)
#define TAILQ_REMOVE(head, elm, field) do { \
if (((elm)->field.tqe_next) != NULL) \
(elm)->field.tqe_next->field.tqe_prev = \
(elm)->field.tqe_prev; \
else \
(head)->tqh_last = (elm)->field.tqe_prev; \
*(elm)->field.tqe_prev = (elm)->field.tqe_next; \
} while (0)
/*
* Circular queue definitions.
*/
#define CIRCLEQ_HEAD(name, type) \
struct name { \
struct type *cqh_first; /* first element */ \
struct type *cqh_last; /* last element */ \
}
#define CIRCLEQ_ENTRY(type) \
struct { \
struct type *cqe_next; /* next element */ \
struct type *cqe_prev; /* previous element */ \
}
#define CIRCLEQ_FIRST(head) ((head)->cqh_first)
#define CIRCLEQ_LAST(head) ((head)->cqh_last)
#define CIRCLEQ_END(head) ((void *)(head))
#define CIRCLEQ_NEXT(elm, field) ((elm)->field.cqe_next)
#define CIRCLEQ_PREV(elm, field) ((elm)->field.cqe_prev)
/*
* Circular queue functions.
*/
#define CIRCLEQ_INIT(head) do { \
(head)->cqh_first = (void *)(head); \
(head)->cqh_last = (void *)(head); \
} while (0)
#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) do { \
(elm)->field.cqe_next = (listelm)->field.cqe_next; \
(elm)->field.cqe_prev = (listelm); \
if ((listelm)->field.cqe_next == (void *)(head)) \
(head)->cqh_last = (elm); \
else \
(listelm)->field.cqe_next->field.cqe_prev = (elm); \
(listelm)->field.cqe_next = (elm); \
} while (0)
#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) do { \
(elm)->field.cqe_next = (listelm); \
(elm)->field.cqe_prev = (listelm)->field.cqe_prev; \
if ((listelm)->field.cqe_prev == (void *)(head)) \
(head)->cqh_first = (elm); \
else \
(listelm)->field.cqe_prev->field.cqe_next = (elm); \
(listelm)->field.cqe_prev = (elm); \
} while (0)
#define CIRCLEQ_INSERT_HEAD(head, elm, field) do { \
(elm)->field.cqe_next = (head)->cqh_first; \
(elm)->field.cqe_prev = (void *)(head); \
if ((head)->cqh_last == (void *)(head)) \
(head)->cqh_last = (elm); \
else \
(head)->cqh_first->field.cqe_prev = (elm); \
(head)->cqh_first = (elm); \
} while (0)
#define CIRCLEQ_INSERT_TAIL(head, elm, field) do { \
(elm)->field.cqe_next = (void *)(head); \
(elm)->field.cqe_prev = (head)->cqh_last; \
if ((head)->cqh_first == (void *)(head)) \
(head)->cqh_first = (elm); \
else \
(head)->cqh_last->field.cqe_next = (elm); \
(head)->cqh_last = (elm); \
} while (0)
#define CIRCLEQ_REMOVE(head, elm, field) do { \
if ((elm)->field.cqe_next == (void *)(head)) \
(head)->cqh_last = (elm)->field.cqe_prev; \
else \
(elm)->field.cqe_next->field.cqe_prev = \
(elm)->field.cqe_prev; \
if ((elm)->field.cqe_prev == (void *)(head)) \
(head)->cqh_first = (elm)->field.cqe_next; \
else \
(elm)->field.cqe_prev->field.cqe_next = \
(elm)->field.cqe_next; \
} while (0)
#endif /* !_SYS_QUEUE_H_ */

361
db2/include/shqueue.h Normal file
View File

@ -0,0 +1,361 @@
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)shqueue.h 8.11 (Sleepycat) 7/27/97
*/
#ifndef _SYS_SHQUEUE_H_
#define _SYS_SHQUEUE_H_
/*
* This file defines three types of data structures: lists, tail queues, and
* circular queues, similarly to the include file <sys/queue.h>.
*
* The difference is that this set of macros can be used for structures that
* reside in shared memory that may be mapped at different addresses in each
* process. In most cases, the macros for shared structures exactly mirror
* the normal macros, although the macro calls require an additional type
* parameter, only used by the HEAD and ENTRY macros of the standard macros.
*
* For details on the use of these macros, see the queue(3) manual page.
*/
/*
* Shared list definitions.
*/
#define SH_LIST_HEAD(name) \
struct name { \
ssize_t slh_first; /* first element */ \
}
#define SH_LIST_ENTRY \
struct { \
ssize_t sle_next; /* relative offset next element */ \
ssize_t sle_prev; /* relative offset of prev element */ \
}
/*
* Shared list functions. Since we use relative offsets for pointers,
* 0 is a valid offset. Therefore, we use -1 to indicate end of list.
* The macros ending in "P" return pointers without checking for end
* of list, the others check for end of list and evaluate to either a
* pointer or NULL.
*/
#define SH_LIST_FIRSTP(head, type) \
((struct type *)(((u_int8_t *)(head)) + (head)->slh_first))
#define SH_LIST_FIRST(head, type) \
((head)->slh_first == -1 ? NULL : \
((struct type *)(((u_int8_t *)(head)) + (head)->slh_first)))
#define SH_LIST_NEXTP(elm, field, type) \
((struct type *)(((u_int8_t *)(elm)) + (elm)->field.sle_next))
#define SH_LIST_NEXT(elm, field, type) \
((elm)->field.sle_next == -1 ? NULL : \
((struct type *)(((u_int8_t *)(elm)) + (elm)->field.sle_next)))
#define SH_LIST_PREV(elm, field) \
((ssize_t *)(((u_int8_t *)(elm)) + (elm)->field.sle_prev))
#define SH_PTR_TO_OFF(src, dest) \
((ssize_t)(((u_int8_t *)(dest)) - ((u_int8_t *)(src))))
#define SH_LIST_END(head) NULL
/*
* Take the element's next pointer and calculate what the corresponding
* Prev pointer should be -- basically it is the negation plus the offset
* of the next field in the structure.
*/
#define SH_LIST_NEXT_TO_PREV(elm, field) \
(-(elm)->field.sle_next + SH_PTR_TO_OFF(elm, &(elm)->field.sle_next))
#define SH_LIST_INIT(head) (head)->slh_first = -1
#define SH_LIST_INSERT_AFTER(listelm, elm, field, type) do { \
if ((listelm)->field.sle_next != -1) { \
(elm)->field.sle_next = SH_PTR_TO_OFF(elm, \
SH_LIST_NEXTP(listelm, field, type)); \
SH_LIST_NEXTP(listelm, field, type)->field.sle_prev = \
SH_LIST_NEXT_TO_PREV(elm, field); \
} else \
(elm)->field.sle_next = -1; \
(listelm)->field.sle_next = SH_PTR_TO_OFF(listelm, elm); \
(elm)->field.sle_prev = SH_LIST_NEXT_TO_PREV(listelm, field); \
} while (0)
#define SH_LIST_INSERT_HEAD(head, elm, field, type) do { \
if ((head)->slh_first != -1) { \
(elm)->field.sle_next = \
(head)->slh_first - SH_PTR_TO_OFF(head, elm); \
SH_LIST_FIRSTP(head, type)->field.sle_prev = \
SH_LIST_NEXT_TO_PREV(elm, field); \
} else \
(elm)->field.sle_next = -1; \
(head)->slh_first = SH_PTR_TO_OFF(head, elm); \
(elm)->field.sle_prev = SH_PTR_TO_OFF(elm, &(head)->slh_first); \
} while (0)
#define SH_LIST_REMOVE(elm, field, type) do { \
if ((elm)->field.sle_next != -1) { \
SH_LIST_NEXTP(elm, field, type)->field.sle_prev = \
(elm)->field.sle_prev - (elm)->field.sle_next; \
*SH_LIST_PREV(elm, field) += (elm)->field.sle_next; \
} else \
*SH_LIST_PREV(elm, field) = -1; \
} while (0)
/*
* Shared tail queue definitions.
*/
#define SH_TAILQ_HEAD(name) \
struct name { \
ssize_t stqh_first; /* relative offset of first element */ \
ssize_t stqh_last; /* relative offset of last's next */ \
}
#define SH_TAILQ_ENTRY \
struct { \
ssize_t stqe_next; /* relative offset of next element */ \
ssize_t stqe_prev; /* relative offset of prev's next */ \
}
/*
* Shared tail queue functions.
*/
#define SH_TAILQ_FIRSTP(head, type) \
((struct type *)((u_int8_t *)(head) + (head)->stqh_first))
#define SH_TAILQ_FIRST(head, type) \
((head)->stqh_first == -1 ? NULL : SH_TAILQ_FIRSTP(head, type))
#define SH_TAILQ_NEXTP(elm, field, type) \
((struct type *)((u_int8_t *)(elm) + (elm)->field.stqe_next))
#define SH_TAILQ_NEXT(elm, field, type) \
((elm)->field.stqe_next == -1 ? NULL : SH_TAILQ_NEXTP(elm, field, type))
#define SH_TAILQ_PREVP(elm, field) \
((ssize_t *)((u_int8_t *)(elm) + (elm)->field.stqe_prev))
#define SH_TAILQ_LAST(head) \
((ssize_t *)(((u_int8_t *)(head)) + (head)->stqh_last))
#define SH_TAILQ_NEXT_TO_PREV(elm, field) \
(-(elm)->field.stqe_next + SH_PTR_TO_OFF(elm, &(elm)->field.stqe_next))
#define SH_TAILQ_END(head) NULL
#define SH_TAILQ_INIT(head) { \
(head)->stqh_first = -1; \
(head)->stqh_last = SH_PTR_TO_OFF(head, &(head)->stqh_first); \
}
#define SH_TAILQ_INSERT_HEAD(head, elm, field, type) do { \
if ((head)->stqh_first != -1) { \
(elm)->field.stqe_next = \
(head)->stqh_first - SH_PTR_TO_OFF(head, elm); \
SH_TAILQ_FIRSTP(head, type)->field.stqe_prev = \
SH_TAILQ_NEXT_TO_PREV(elm, field); \
} else { \
(elm)->field.stqe_next = -1; \
(head)->stqh_last = \
SH_PTR_TO_OFF(head, &(elm)->field.stqe_next); \
} \
(head)->stqh_first = SH_PTR_TO_OFF(head, elm); \
(elm)->field.stqe_prev = \
SH_PTR_TO_OFF(elm, &(head)->stqh_first); \
} while (0)
#define SH_TAILQ_INSERT_TAIL(head, elm, field) do { \
(elm)->field.stqe_next = -1; \
(elm)->field.stqe_prev = \
-SH_PTR_TO_OFF(head, elm) + (head)->stqh_last; \
if ((head)->stqh_last == \
SH_PTR_TO_OFF((head), &(head)->stqh_first)) \
(head)->stqh_first = SH_PTR_TO_OFF(head, elm); \
else \
*SH_TAILQ_LAST(head) = -(head)->stqh_last + \
SH_PTR_TO_OFF((elm), &(elm)->field.stqe_next) + \
SH_PTR_TO_OFF(head, elm); \
(head)->stqh_last = \
SH_PTR_TO_OFF(head, &((elm)->field.stqe_next)); \
} while (0)
#define SH_TAILQ_INSERT_AFTER(head, listelm, elm, field, type) do { \
if ((listelm)->field.stqe_next != -1) { \
(elm)->field.stqe_next = (listelm)->field.stqe_next - \
SH_PTR_TO_OFF(listelm, elm); \
SH_TAILQ_NEXTP(listelm, field, type)->field.stqe_prev = \
SH_TAILQ_NEXT_TO_PREV(elm, field); \
} else { \
(elm)->field.stqe_next = -1; \
(head)->stqh_last = \
SH_PTR_TO_OFF(head, &elm->field.stqe_next); \
} \
(listelm)->field.stqe_next = SH_PTR_TO_OFF(listelm, elm); \
(elm)->field.stqe_prev = SH_TAILQ_NEXT_TO_PREV(listelm, field); \
} while (0)
#define SH_TAILQ_REMOVE(head, elm, field, type) do { \
if ((elm)->field.stqe_next != -1) { \
SH_TAILQ_NEXTP(elm, field, type)->field.stqe_prev = \
(elm)->field.stqe_prev + \
SH_PTR_TO_OFF(SH_TAILQ_NEXTP(elm, \
field, type), elm); \
*SH_TAILQ_PREVP(elm, field) += elm->field.stqe_next; \
} else { \
(head)->stqh_last = (elm)->field.stqe_prev + \
SH_PTR_TO_OFF(head, elm); \
*SH_TAILQ_PREVP(elm, field) = -1; \
} \
} while (0)
/*
* Shared circular queue definitions.
*/
#define SH_CIRCLEQ_HEAD(name) \
struct name { \
ssize_t scqh_first; /* first element */ \
ssize_t scqh_last; /* last element */ \
}
#define SH_CIRCLEQ_ENTRY \
struct { \
ssize_t scqe_next; /* next element */ \
ssize_t scqe_prev; /* previous element */ \
}
/*
* Shared circular queue functions.
*/
#define SH_CIRCLEQ_FIRSTP(head, type) \
((struct type *)(((u_int8_t *)(head)) + (head)->scqh_first))
#define SH_CIRCLEQ_FIRST(head, type) \
((head)->scqh_first == -1 ? \
(void *)head : SH_CIRCLEQ_FIRSTP(head, type))
#define SH_CIRCLEQ_LASTP(head, type) \
((struct type *)(((u_int8_t *)(head)) + (head)->scqh_last))
#define SH_CIRCLEQ_LAST(head, type) \
((head)->scqh_last == -1 ? (void *)head : SH_CIRCLEQ_LASTP(head, type))
#define SH_CIRCLEQ_NEXTP(elm, field, type) \
((struct type *)(((u_int8_t *)(elm)) + (elm)->field.scqe_next))
#define SH_CIRCLEQ_NEXT(head, elm, field, type) \
((elm)->field.scqe_next == SH_PTR_TO_OFF(elm, head) ? \
(void *)head : SH_CIRCLEQ_NEXTP(elm, field, type))
#define SH_CIRCLEQ_PREVP(elm, field, type) \
((struct type *)(((u_int8_t *)(elm)) + (elm)->field.scqe_prev))
#define SH_CIRCLEQ_PREV(head, elm, field, type) \
((elm)->field.scqe_prev == SH_PTR_TO_OFF(elm, head) ? \
(void *)head : SH_CIRCLEQ_PREVP(elm, field, type))
#define SH_CIRCLEQ_END(head) ((void *)(head))
#define SH_CIRCLEQ_INIT(head) { \
(head)->scqh_first = 0; \
(head)->scqh_last = 0; \
}
#define SH_CIRCLEQ_INSERT_AFTER(head, listelm, elm, field, type) do { \
(elm)->field.scqe_prev = SH_PTR_TO_OFF(elm, listelm); \
(elm)->field.scqe_next = (listelm)->field.scqe_next + \
(elm)->field.scqe_prev; \
if (SH_CIRCLEQ_NEXTP(listelm, field, type) == (void *)head) \
(head)->scqh_last = SH_PTR_TO_OFF(head, elm); \
else \
SH_CIRCLEQ_NEXTP(listelm, \
field, type)->field.scqe_prev = \
SH_PTR_TO_OFF(SH_CIRCLEQ_NEXTP(listelm, \
field, type), elm); \
(listelm)->field.scqe_next = -(elm)->field.scqe_prev; \
} while (0)
#define SH_CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field, type) do { \
(elm)->field.scqe_next = SH_PTR_TO_OFF(elm, listelm); \
(elm)->field.scqe_prev = (elm)->field.scqe_next - \
SH_CIRCLEQ_PREVP(listelm, field, type)->field.scqe_next;\
if (SH_CIRCLEQ_PREVP(listelm, field, type) == (void *)(head)) \
(head)->scqh_first = SH_PTR_TO_OFF(head, elm); \
else \
SH_CIRCLEQ_PREVP(listelm, \
field, type)->field.scqe_next = \
SH_PTR_TO_OFF(SH_CIRCLEQ_PREVP(listelm, \
field, type), elm); \
(listelm)->field.scqe_prev = -(elm)->field.scqe_next; \
} while (0)
#define SH_CIRCLEQ_INSERT_HEAD(head, elm, field, type) do { \
(elm)->field.scqe_prev = SH_PTR_TO_OFF(elm, head); \
(elm)->field.scqe_next = (head)->scqh_first + \
(elm)->field.scqe_prev; \
if ((head)->scqh_last == 0) \
(head)->scqh_last = -(elm)->field.scqe_prev; \
else \
SH_CIRCLEQ_FIRSTP(head, type)->field.scqe_prev = \
SH_PTR_TO_OFF(SH_CIRCLEQ_FIRSTP(head, type), elm); \
(head)->scqh_first = -(elm)->field.scqe_prev; \
} while (0)
#define SH_CIRCLEQ_INSERT_TAIL(head, elm, field, type) do { \
(elm)->field.scqe_next = SH_PTR_TO_OFF(elm, head); \
(elm)->field.scqe_prev = (head)->scqh_last + \
(elm)->field.scqe_next; \
if ((head)->scqh_first == 0) \
(head)->scqh_first = -(elm)->field.scqe_next; \
else \
SH_CIRCLEQ_LASTP(head, type)->field.scqe_next = \
SH_PTR_TO_OFF(SH_CIRCLEQ_LASTP(head, type), elm); \
(head)->scqh_last = -(elm)->field.scqe_next; \
} while (0)
#define SH_CIRCLEQ_REMOVE(head, elm, field, type) do { \
if (SH_CIRCLEQ_NEXTP(elm, field, type) == (void *)(head)) \
(head)->scqh_last += (elm)->field.scqe_prev; \
else \
SH_CIRCLEQ_NEXTP(elm, field, type)->field.scqe_prev += \
(elm)->field.scqe_prev; \
if (SH_CIRCLEQ_PREVP(elm, field, type) == (void *)(head)) \
(head)->scqh_first += (elm)->field.scqe_next; \
else \
SH_CIRCLEQ_PREVP(elm, field, type)->field.scqe_next += \
(elm)->field.scqe_next; \
} while (0)
#endif /* !_SYS_SHQUEUE_H_ */

112
db2/include/txn.h Normal file
View File

@ -0,0 +1,112 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*
* @(#)txn.h 10.6 (Sleepycat) 7/29/97
*/
#ifndef _TXN_H_
#define _TXN_H_
/*
* The name of the transaction shared memory region is DEFAULT_TXN_FILE and
* the region is always created group RW of the group owning the directory.
*/
#define DEFAULT_TXN_FILE "__db_txn.share"
#define TXN_INVALID 0xffffffff /* Maximum number of txn ids. */
#define TXN_MINIMUM 0x80000000 /* First transaction id */
/*
* Transaction type declarations.
*/
/*
* Internal data maintained in shared memory for each transaction.
*/
typedef struct __txn_detail {
u_int32_t txnid; /* current transaction id
used to link free list also */
DB_LSN last_lsn; /* last lsn written for this txn */
DB_LSN begin_lsn; /* lsn of begin record */
size_t last_lock; /* offset in lock region of last lock
for this transaction. */
#define TXN_UNALLOC 0
#define TXN_RUNNING 1
#define TXN_ABORTED 2
#define TXN_PREPARED 3
u_int32_t status; /* status of the transaction */
} TXN_DETAIL;
/*
* The transaction manager encapsulates the transaction system. It contains
* references to the log and lock managers as well as the state that keeps
* track of the shared memory region.
*/
struct __db_txnmgr {
/* These fields need to be protected for multi-threaded support. */
db_mutex_t mutex; /* Synchronization. */
/* list of active transactions */
TAILQ_HEAD(_chain, __db_txn) txn_chain;
/* These fields are not protected. */
DB_ENV *dbenv; /* Environment. */
int (*recover) /* Recovery dispatch routine */
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int fd; /* mapped file descriptor */
u_int flags; /* DB_TXN_NOSYNC, DB_THREAD */
size_t reg_size; /* how large we think the region is */
DB_TXNREGION *region; /* address of shared memory region */
};
/*
* Layout of the shared memory region.
*
*/
struct __db_txnregion {
RLAYOUT hdr; /* Shared memory region header. */
u_int32_t magic; /* transaction magic number */
u_int32_t version; /* version number */
u_int32_t maxtxns; /* maximum number of active txns */
u_int32_t last_txnid; /* last transaction id given out */
u_int32_t free_txn; /* head of transaction free list */
DB_LSN pending_ckp; /* last checkpoint did not finish */
DB_LSN last_ckp; /* lsn of the last checkpoint */
time_t time_ckp; /* time of last checkpoint */
u_int32_t logtype; /* type of logging */
u_int32_t locktype; /* lock type */
u_int32_t naborts; /* number of aborted transactions */
u_int32_t ncommits; /* number of committed transactions */
u_int32_t nbegins; /* number of begun transactions */
TXN_DETAIL table[1]; /* array of TXN structures */
};
#define TXN_REGION_SIZE(N) \
(sizeof(DB_TXNREGION) + N * sizeof(DB_TXN))
/* Macros to lock/unlock the region and threads. */
#define LOCK_TXNTHREAD(tmgrp) \
if (F_ISSET(tmgrp, DB_THREAD)) \
(void)__db_mutex_lock(&(tmgrp)->mutex, -1, \
(tmgrp)->dbenv == NULL ? NULL : (tmgrp)->dbenv->db_yield)
#define UNLOCK_TXNTHREAD(tmgrp) \
if (F_ISSET(tmgrp, DB_THREAD)) \
(void)__db_mutex_unlock(&(tmgrp)->mutex, -1)
#define LOCK_TXNREGION(tmgrp) \
(void)__db_mutex_lock(&(tmgrp)->region->hdr.lock,(tmgrp)->fd, \
(tmgrp)->dbenv == NULL ? NULL : (tmgrp)->dbenv->db_yield)
#define UNLOCK_TXNREGION(tmgrp) \
(void)__db_mutex_unlock(&(tmgrp)->region->hdr.lock, (tmgrp)->fd)
/*
* Log record types.
*/
#define TXN_BEGIN 1
#define TXN_COMMIT 2
#define TXN_PREPARE 3
#define TXN_CHECKPOINT 4
#include "txn_auto.h"
#include "txn_ext.h"
#endif /* !_TXN_H_ */

25
db2/include/txn_auto.h Normal file
View File

@ -0,0 +1,25 @@
/* Do not edit: automatically built by dist/db_gen.sh. */
#ifndef txn_AUTO_H
#define txn_AUTO_H
#define DB_txn_regop (DB_txn_BEGIN + 1)
typedef struct _txn_regop_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
u_int32_t opcode;
} __txn_regop_args;
#define DB_txn_ckp (DB_txn_BEGIN + 2)
typedef struct _txn_ckp_args {
u_int32_t type;
DB_TXN *txnid;
DB_LSN prev_lsn;
DB_LSN ckp_lsn;
DB_LSN last_ckp;
} __txn_ckp_args;
#endif

18
db2/include/txn_ext.h Normal file
View File

@ -0,0 +1,18 @@
/* Do not edit: automatically built by dist/distrib. */
int __txn_regop_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
u_int32_t));
int __txn_regop_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __txn_regop_read __P((void *, __txn_regop_args **));
int __txn_ckp_log
__P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
DB_LSN *, DB_LSN *));
int __txn_ckp_print
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __txn_ckp_read __P((void *, __txn_ckp_args **));
int __txn_init_print __P((DB_ENV *));
int __txn_init_recover __P((DB_ENV *));
int __txn_regop_recover
__P((DB_LOG *, DBT *, DB_LSN *, int, void *));
int __txn_ckp_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));

1362
db2/lock/lock.c Normal file

File diff suppressed because it is too large Load Diff

39
db2/lock/lock_conflict.c Normal file
View File

@ -0,0 +1,39 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)lock_conflict.c 10.2 (Sleepycat) 6/21/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#endif
#include "db_int.h"
/*
* The conflict arrays are set up such that the row is the lock you
* are holding and the column is the lock that is desired.
*/
const u_int8_t db_rw_conflicts[] = {
/* N R W */
/* N */ 0, 0, 0,
/* R */ 0, 0, 1,
/* W */ 0, 1, 1
};
const u_int8_t db_riw_conflicts[] = {
/* N S X IS IX SIX */
/* N */ 0, 0, 0, 0, 0, 0,
/* S */ 0, 0, 1, 0, 1, 1,
/* X */ 1, 1, 1, 1, 1, 1,
/* IS */ 0, 0, 1, 0, 0, 0,
/* IX */ 0, 1, 1, 0, 0, 0,
/* SIX */ 0, 1, 1, 0, 0, 0
};

496
db2/lock/lock_deadlock.c Normal file
View File

@ -0,0 +1,496 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char copyright[] =
"@(#) Copyright (c) 1997\n\
Sleepycat Software Inc. All rights reserved.\n";
static const char sccsid[] = "@(#)lock_deadlock.c 10.20 (Sleepycat) 8/21/97";
#endif
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#endif
#include "db_int.h"
#include "shqueue.h"
#include "db_shash.h"
#include "lock.h"
#include "common_ext.h"
#define ISSET_MAP(M, N) (M[(N) / 32] & (1 << (N) % 32))
#define CLEAR_MAP(M, N) { \
u_int32_t __i; \
for (__i = 0; __i < (N); __i++) \
M[__i] = 0; \
}
#define SET_MAP(M, B) (M[(B) / 32] |= (1 << ((B) % 32)))
#define CLR_MAP(M, B) (M[(B) / 32] &= ~(1 << ((B) % 32)))
#define OR_MAP(D, S, N) { \
u_int32_t __i; \
for (__i = 0; __i < (N); __i++) \
D[__i] |= S[__i]; \
}
#define BAD_KILLID 0xffffffff
typedef struct {
int valid;
u_int32_t id;
DB_LOCK last_lock;
} locker_info;
static int __dd_abort __P((DB_ENV *, locker_info *));
static int __dd_build __P((DB_ENV *, u_int32_t **, int *, locker_info **));
#ifdef DEBUG
static void __dd_debug __P((DB_ENV *, locker_info *, u_int32_t *, int));
#endif
static u_int32_t
*__dd_find __P((u_int32_t *, locker_info *, u_int32_t));
int
lock_detect(lt, flags, atype)
DB_LOCKTAB *lt;
int flags;
u_int32_t atype;
{
DB_ENV *dbenv;
locker_info *idmap;
u_int32_t *bitmap, *deadlock, killid;
int do_pass, i, nlockers, nentries, ret;
/* Validate arguments. */
if ((ret =
__db_fchk(lt->dbenv, "lock_detect", flags, DB_LOCK_CONFLICT)) != 0)
return (ret);
/* Check if a detector run is necessary. */
do_pass = 1;
dbenv = lt->dbenv;
if (LF_ISSET(DB_LOCK_CONFLICT)) {
/* Make a pass every time a lock waits. */
LOCK_LOCKREGION(lt);
do_pass = dbenv->lk_info->region->need_dd != 0;
UNLOCK_LOCKREGION(lt);
}
if (!do_pass)
return (0);
/* Build the waits-for bitmap. */
if ((ret = __dd_build(dbenv, &bitmap, &nlockers, &idmap)) != 0)
return (ret);
if (nlockers == 0)
return (0);
#ifdef DEBUG
if (dbenv->db_verbose != 0)
__dd_debug(dbenv, idmap, bitmap, nlockers);
#endif
/* Find a deadlock. */
deadlock = __dd_find(bitmap, idmap, nlockers);
nentries = ALIGN(nlockers, 32) / 32;
killid = BAD_KILLID;
if (deadlock != NULL) {
/* Kill someone. */
switch (atype) {
case DB_LOCK_OLDEST:
/*
* Find the first bit set in the current
* array and then look for a lower tid in
* the array.
*/
for (i = 0; i < nlockers; i++)
if (ISSET_MAP(deadlock, i))
killid = i;
if (killid == BAD_KILLID) {
__db_err(dbenv,
"warning: could not find %s",
"locker to abort");
break;
}
/*
* The oldest transaction has the lowest
* transaction id.
*/
for (i = killid + 1; i < nlockers; i++)
if (ISSET_MAP(deadlock, i) &&
idmap[i].id < idmap[killid].id)
killid = i;
break;
case DB_LOCK_DEFAULT:
case DB_LOCK_RANDOM:
/*
* We are trying to calculate the id of the
* locker whose entry is indicated by deadlock.
* We know that this is less than nlockers, so
* the cast below is valid.
*/
killid =
(u_int32_t)((deadlock - bitmap) / nentries);
break;
case DB_LOCK_YOUNGEST:
/*
* Find the first bit set in the current
* array and then look for a lower tid in
* the array.
*/
for (i = 0; i < nlockers; i++)
if (ISSET_MAP(deadlock, i))
killid = i;
if (killid == BAD_KILLID) {
__db_err(dbenv,
"warning: could not find %s",
"locker to abort");
break;
}
/*
* The youngest transaction has the highest
* transaction id.
*/
for (i = killid + 1; i < nlockers; i++)
if (ISSET_MAP(deadlock, i) &&
idmap[i].id > idmap[killid].id)
killid = i;
break;
default:
killid = BAD_KILLID;
ret = EINVAL;
}
/* Kill the locker with lockid idmap[killid]. */
if (dbenv->db_verbose != 0 && killid != BAD_KILLID)
__db_err(dbenv, "Aborting locker %lx",
(u_long)idmap[killid].id);
if (killid != BAD_KILLID &&
(ret = __dd_abort(dbenv, &idmap[killid])) != 0)
__db_err(dbenv,
"warning: unable to abort locker %lx",
(u_long)idmap[killid].id);
}
free(bitmap);
free(idmap);
return (ret);
}
/*
* ========================================================================
* Utilities
*/
static int
__dd_build(dbenv, bmp, nlockers, idmap)
DB_ENV *dbenv;
u_int32_t **bmp;
int *nlockers;
locker_info **idmap;
{
DB_LOCKTAB *lt;
DB_LOCKOBJ *op, *lockerp;
struct __db_lock *lp;
u_int32_t *bitmap, count, *entryp, i, id, nentries, *tmpmap;
locker_info *id_array;
int is_first, ret;
lt = dbenv->lk_info;
/*
* We'll check how many lockers there are, add a few more in for
* good measure and then allocate all the structures. Then we'll
* verify that we have enough room when we go back in and get the
* mutex the second time.
*/
LOCK_LOCKREGION(lt);
retry: count = lt->region->nlockers;
lt->region->need_dd = 0;
UNLOCK_LOCKREGION(lt);
if (count == 0) {
*nlockers = 0;
return (0);
}
if (dbenv->db_verbose)
__db_err(dbenv, "%lu lockers", (u_long)count);
count += 10;
nentries = ALIGN(count, 32) / 32;
/*
* Allocate enough space for a count by count bitmap matrix.
*
* XXX
* We can probably save the malloc's between iterations just
* reallocing if necessary because count grew by too much.
*/
if ((bitmap = (u_int32_t *)calloc((size_t)count,
sizeof(u_int32_t) * nentries)) == NULL) {
__db_err(dbenv, "%s", strerror(ENOMEM));
return (ENOMEM);
}
if ((tmpmap =
(u_int32_t *)calloc(sizeof(u_int32_t), nentries)) == NULL) {
__db_err(dbenv, "%s", strerror(ENOMEM));
free(bitmap);
return (ENOMEM);
}
if ((id_array = (locker_info *)calloc((size_t)count,
sizeof(locker_info))) == NULL) {
__db_err(dbenv, "%s", strerror(ENOMEM));
free(bitmap);
free(tmpmap);
return (ENOMEM);
}
/*
* Now go back in and actually fill in the matrix.
*/
LOCK_LOCKREGION(lt);
if (lt->region->nlockers > count) {
free(bitmap);
free(tmpmap);
free(id_array);
goto retry;
}
/*
* First we go through and assign each locker a deadlock detector id.
* Note that we fill in the idmap in the next loop since that's the
* only place where we conveniently have both the deadlock id and the
* actual locker.
*/
for (id = 0, i = 0; i < lt->region->table_size; i++)
for (op = SH_TAILQ_FIRST(&lt->hashtab[i], __db_lockobj);
op != NULL; op = SH_TAILQ_NEXT(op, links, __db_lockobj))
if (op->type == DB_LOCK_LOCKER)
op->dd_id = id++;
/*
* We go through the hash table and find each object. For each object,
* we traverse the waiters list and add an entry in the waitsfor matrix
* for each waiter/holder combination.
*/
for (i = 0; i < lt->region->table_size; i++) {
for (op = SH_TAILQ_FIRST(&lt->hashtab[i], __db_lockobj);
op != NULL; op = SH_TAILQ_NEXT(op, links, __db_lockobj)) {
if (op->type != DB_LOCK_OBJTYPE)
continue;
CLEAR_MAP(tmpmap, nentries);
/*
* First we go through and create a bit map that
* represents all the holders of this object.
*/
for (lp = SH_TAILQ_FIRST(&op->holders, __db_lock);
lp != NULL;
lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
if ((errno = __lock_getobj(lt, lp->holder,
NULL, DB_LOCK_LOCKER, &lockerp)) != 0) {
__db_err(dbenv,
"warning unable to find object");
continue;
}
id_array[lockerp->dd_id].id = lp->holder;
id_array[lockerp->dd_id].valid = 1;
/*
* If the holder has already been aborted, then
* we should ignore it for now.
*/
if (lp->status == DB_LSTAT_HELD)
SET_MAP(tmpmap, lockerp->dd_id);
}
/*
* Next, for each waiter, we set its row in the matrix
* equal to the map of holders we set up above.
*/
for (is_first = 1,
lp = SH_TAILQ_FIRST(&op->waiters, __db_lock);
lp != NULL;
is_first = 0,
lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
if ((ret = __lock_getobj(lt,
lp->holder, NULL, DB_LOCK_LOCKER, &lockerp))
!= 0) {
__db_err(dbenv,
"warning unable to find object");
continue;
}
id_array[lockerp->dd_id].id = lp->holder;
id_array[lockerp->dd_id].valid = 1;
/*
* If the transaction is pending abortion, then
* ignore it on this iteration.
*/
if (lp->status != DB_LSTAT_WAITING)
continue;
entryp = bitmap + (nentries * lockerp->dd_id);
OR_MAP(entryp, tmpmap, nentries);
/*
* If this is the first waiter on the queue,
* then we remove the waitsfor relationship
* with oneself. However, if it's anywhere
* else on the queue, then we have to keep
* it and we have an automatic deadlock.
*/
if (is_first)
CLR_MAP(entryp, lockerp->dd_id);
}
}
}
/* Now for each locker; record its last lock. */
for (id = 0; id < count; id++) {
if (!id_array[id].valid)
continue;
if ((ret = __lock_getobj(lt,
id_array[id].id, NULL, DB_LOCK_LOCKER, &lockerp)) != 0) {
__db_err(dbenv,
"No locks for locker %lu", (u_long)id_array[id].id);
continue;
}
lp = SH_LIST_FIRST(&lockerp->heldby, __db_lock);
if (lp != NULL)
id_array[id].last_lock = LOCK_TO_OFFSET(lt, lp);
}
/* Pass complete, reset the deadlock detector bit. */
lt->region->need_dd = 0;
UNLOCK_LOCKREGION(lt);
/*
* Now we can release everything except the bitmap matrix that we
* created.
*/
*nlockers = id;
*idmap = id_array;
*bmp = bitmap;
free(tmpmap);
return (0);
}
static u_int32_t *
__dd_find(bmp, idmap, nlockers)
u_int32_t *bmp;
locker_info *idmap;
u_int32_t nlockers;
{
u_int32_t i, j, nentries, *mymap, *tmpmap;
/*
* For each locker, or in the bits from the lockers
* on which that locker is waiting.
*/
nentries = ALIGN(nlockers, 32) / 32;
for (mymap = bmp, i = 0; i < nlockers; i++, mymap += nentries) {
if (!idmap[i].valid)
continue;
for (j = 0; j < nlockers; j++) {
if (ISSET_MAP(mymap, j)) {
/* Find the map for this bit. */
tmpmap = bmp + (nentries * j);
OR_MAP(mymap, tmpmap, nentries);
if (ISSET_MAP(mymap, i))
return (mymap);
}
}
}
return (NULL);
}
static int
__dd_abort(dbenv, info)
DB_ENV *dbenv;
locker_info *info;
{
DB_LOCKTAB *lt;
DB_LOCKOBJ *lockerp, *sh_obj;
struct __db_lock *lockp;
int ret;
lt = dbenv->lk_info;
LOCK_LOCKREGION(lt);
/* Find the locker's last lock. */
if ((ret =
__lock_getobj(lt, info->id, NULL, DB_LOCK_LOCKER, &lockerp)) != 0)
goto out;
lockp = SH_LIST_FIRST(&lockerp->heldby, __db_lock);
if (LOCK_TO_OFFSET(lt, lockp) != info->last_lock ||
lockp == NULL || lockp->status != DB_LSTAT_WAITING)
goto out;
/* Abort lock, take it off list, and wake up this lock. */
lockp->status = DB_LSTAT_ABORTED;
lt->region->ndeadlocks++;
SH_LIST_REMOVE(lockp, locker_links, __db_lock);
sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj);
SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock);
(void)__db_mutex_unlock(&lockp->mutex, lt->fd);
ret = 0;
out: UNLOCK_LOCKREGION(lt);
return (ret);
}
#ifdef DEBUG
static void
__dd_debug(dbenv, idmap, bitmap, nlockers)
DB_ENV *dbenv;
locker_info *idmap;
u_int32_t *bitmap;
int nlockers;
{
u_int32_t *mymap;
int i, j, nentries;
char *msgbuf;
__db_err(dbenv, "Waitsfor array");
__db_err(dbenv, "waiter\twaiting on");
/*
* Alloc space to print 10 bytes per item waited on.
*/
if ((msgbuf = (char *)malloc((nlockers + 1) * 10 + 64)) == NULL) {
errno = ENOMEM;
__db_err(dbenv, "%s", strerror(errno));
return;
}
nentries = ALIGN(nlockers, 32) / 32;
for (mymap = bitmap, i = 0; i < nlockers; i++, mymap += nentries) {
if (!idmap[i].valid)
continue;
sprintf(msgbuf, "%lx\t\t", (u_long)idmap[i].id);/* Waiter. */
for (j = 0; j < nlockers; j++)
if (ISSET_MAP(mymap, j))
sprintf(msgbuf, "%s %lx", msgbuf,
(u_long)idmap[j].id);
(void)sprintf(msgbuf,
"%s %lu", msgbuf, (u_long)idmap[i].last_lock);
__db_err(dbenv, msgbuf);
}
free(msgbuf);
}
#endif

103
db2/lock/lock_util.c Normal file
View File

@ -0,0 +1,103 @@
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)lock_util.c 10.4 (Sleepycat) 7/22/97";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#endif
#include "db_int.h"
#include "shqueue.h"
#include "db_page.h"
#include "db_shash.h"
#include "hash.h"
#include "lock.h"
/*
* This function is used to compare a DBT that is about to be entered
* into a hash table with an object already in the hash table. Note
* that it just returns true on equal and 0 on not-equal. Therefore this
* cannot be used as a sort function; its purpose is to be used as a
* hash comparison function.
* PUBLIC: int __lock_cmp __P((DBT *, DB_LOCKOBJ *));
*/
int
__lock_cmp(dbt, lock_obj)
DBT *dbt;
DB_LOCKOBJ *lock_obj;
{
void *obj_data;
if (lock_obj->type != DB_LOCK_OBJTYPE)
return (0);
obj_data = SH_DBT_PTR(&lock_obj->lockobj);
return (dbt->size == lock_obj->lockobj.size &&
memcmp(dbt->data, obj_data, dbt->size) == 0);
}
/*
* PUBLIC: int __lock_locker_cmp __P((u_int32_t, DB_LOCKOBJ *));
*/
int
__lock_locker_cmp(locker, lock_obj)
u_int32_t locker;
DB_LOCKOBJ *lock_obj;
{
void *obj_data;
if (lock_obj->type != DB_LOCK_LOCKER)
return (0);
obj_data = SH_DBT_PTR(&lock_obj->lockobj);
return (memcmp(&locker, obj_data, sizeof(u_int32_t)) == 0);
}
/*
* PUBLIC: int __lock_ohash __P((DBT *));
*/
int
__lock_ohash(dbt)
DBT *dbt;
{
return (__ham_func5(dbt->data, dbt->size));
}
/*
* PUBLIC: u_int32_t __lock_locker_hash __P((u_int32_t));
*/
u_int32_t
__lock_locker_hash(locker)
u_int32_t locker;
{
return (__ham_func5(&locker, sizeof(locker)));
}
/*
* PUBLIC: u_int32_t __lock_lhash __P((DB_LOCKOBJ *));
*/
u_int32_t
__lock_lhash(lock_obj)
DB_LOCKOBJ *lock_obj;
{
void *obj_data;
obj_data = SH_DBT_PTR(&lock_obj->lockobj);
return (__ham_func5(obj_data, lock_obj->lockobj.size));
}

Some files were not shown because too many files have changed in this diff Show More