260 lines
7.2 KiB
C
260 lines
7.2 KiB
C
/*
|
|
* tmem.h
|
|
*
|
|
* Transcendent memory
|
|
*
|
|
* Copyright (c) 2009-2012, Dan Magenheimer, Oracle Corp.
|
|
*/
|
|
|
|
#ifndef _TMEM_H_
|
|
#define _TMEM_H_
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/hash.h>
|
|
#include <linux/atomic.h>
|
|
|
|
/*
|
|
* These are defined by the Xen<->Linux ABI so should remain consistent
|
|
*/
|
|
#define TMEM_POOL_PERSIST 1
|
|
#define TMEM_POOL_SHARED 2
|
|
#define TMEM_POOL_PRECOMPRESSED 4
|
|
#define TMEM_POOL_PAGESIZE_SHIFT 4
|
|
#define TMEM_POOL_PAGESIZE_MASK 0xf
|
|
#define TMEM_POOL_RESERVED_BITS 0x00ffff00
|
|
|
|
/*
|
|
* sentinels have proven very useful for debugging but can be removed
|
|
* or disabled before final merge.
|
|
*/
|
|
#undef SENTINELS
|
|
#ifdef SENTINELS
|
|
#define DECL_SENTINEL uint32_t sentinel;
|
|
#define SET_SENTINEL(_x, _y) (_x->sentinel = _y##_SENTINEL)
|
|
#define INVERT_SENTINEL(_x, _y) (_x->sentinel = ~_y##_SENTINEL)
|
|
#define ASSERT_SENTINEL(_x, _y) WARN_ON(_x->sentinel != _y##_SENTINEL)
|
|
#define ASSERT_INVERTED_SENTINEL(_x, _y) WARN_ON(_x->sentinel != ~_y##_SENTINEL)
|
|
#else
|
|
#define DECL_SENTINEL
|
|
#define SET_SENTINEL(_x, _y) do { } while (0)
|
|
#define INVERT_SENTINEL(_x, _y) do { } while (0)
|
|
#define ASSERT_SENTINEL(_x, _y) do { } while (0)
|
|
#define ASSERT_INVERTED_SENTINEL(_x, _y) do { } while (0)
|
|
#endif
|
|
|
|
#define ASSERT_SPINLOCK(_l) lockdep_assert_held(_l)
|
|
|
|
/*
|
|
* A pool is the highest-level data structure managed by tmem and
|
|
* usually corresponds to a large independent set of pages such as
|
|
* a filesystem. Each pool has an id, and certain attributes and counters.
|
|
* It also contains a set of hash buckets, each of which contains an rbtree
|
|
* of objects and a lock to manage concurrency within the pool.
|
|
*/
|
|
|
|
#define TMEM_HASH_BUCKET_BITS 8
|
|
#define TMEM_HASH_BUCKETS (1<<TMEM_HASH_BUCKET_BITS)
|
|
|
|
struct tmem_hashbucket {
|
|
struct rb_root obj_rb_root;
|
|
spinlock_t lock;
|
|
};
|
|
|
|
struct tmem_pool {
|
|
void *client; /* "up" for some clients, avoids table lookup */
|
|
struct list_head pool_list;
|
|
uint32_t pool_id;
|
|
bool persistent;
|
|
bool shared;
|
|
atomic_t obj_count;
|
|
atomic_t refcount;
|
|
struct tmem_hashbucket hashbucket[TMEM_HASH_BUCKETS];
|
|
DECL_SENTINEL
|
|
};
|
|
|
|
#define is_persistent(_p) (_p->persistent)
|
|
#define is_ephemeral(_p) (!(_p->persistent))
|
|
|
|
/*
|
|
* An object id ("oid") is large: 192-bits (to ensure, for example, files
|
|
* in a modern filesystem can be uniquely identified).
|
|
*/
|
|
|
|
struct tmem_oid {
|
|
uint64_t oid[3];
|
|
};
|
|
|
|
static inline void tmem_oid_set_invalid(struct tmem_oid *oidp)
|
|
{
|
|
oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL;
|
|
}
|
|
|
|
static inline bool tmem_oid_valid(struct tmem_oid *oidp)
|
|
{
|
|
return oidp->oid[0] != -1UL || oidp->oid[1] != -1UL ||
|
|
oidp->oid[2] != -1UL;
|
|
}
|
|
|
|
static inline int tmem_oid_compare(struct tmem_oid *left,
|
|
struct tmem_oid *right)
|
|
{
|
|
int ret;
|
|
|
|
if (left->oid[2] == right->oid[2]) {
|
|
if (left->oid[1] == right->oid[1]) {
|
|
if (left->oid[0] == right->oid[0])
|
|
ret = 0;
|
|
else if (left->oid[0] < right->oid[0])
|
|
ret = -1;
|
|
else
|
|
return 1;
|
|
} else if (left->oid[1] < right->oid[1])
|
|
ret = -1;
|
|
else
|
|
ret = 1;
|
|
} else if (left->oid[2] < right->oid[2])
|
|
ret = -1;
|
|
else
|
|
ret = 1;
|
|
return ret;
|
|
}
|
|
|
|
static inline unsigned tmem_oid_hash(struct tmem_oid *oidp)
|
|
{
|
|
return hash_long(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2],
|
|
TMEM_HASH_BUCKET_BITS);
|
|
}
|
|
|
|
#ifdef CONFIG_RAMSTER
|
|
struct tmem_xhandle {
|
|
uint8_t client_id;
|
|
uint8_t xh_data_cksum;
|
|
uint16_t xh_data_size;
|
|
uint16_t pool_id;
|
|
struct tmem_oid oid;
|
|
uint32_t index;
|
|
void *extra;
|
|
};
|
|
|
|
static inline struct tmem_xhandle tmem_xhandle_fill(uint16_t client_id,
|
|
struct tmem_pool *pool,
|
|
struct tmem_oid *oidp,
|
|
uint32_t index)
|
|
{
|
|
struct tmem_xhandle xh;
|
|
xh.client_id = client_id;
|
|
xh.xh_data_cksum = (uint8_t)-1;
|
|
xh.xh_data_size = (uint16_t)-1;
|
|
xh.pool_id = pool->pool_id;
|
|
xh.oid = *oidp;
|
|
xh.index = index;
|
|
return xh;
|
|
}
|
|
#endif
|
|
|
|
|
|
/*
|
|
* A tmem_obj contains an identifier (oid), pointers to the parent
|
|
* pool and the rb_tree to which it belongs, counters, and an ordered
|
|
* set of pampds, structured in a radix-tree-like tree. The intermediate
|
|
* nodes of the tree are called tmem_objnodes.
|
|
*/
|
|
|
|
struct tmem_objnode;
|
|
|
|
struct tmem_obj {
|
|
struct tmem_oid oid;
|
|
struct tmem_pool *pool;
|
|
struct rb_node rb_tree_node;
|
|
struct tmem_objnode *objnode_tree_root;
|
|
unsigned int objnode_tree_height;
|
|
unsigned long objnode_count;
|
|
long pampd_count;
|
|
#ifdef CONFIG_RAMSTER
|
|
/*
|
|
* for current design of ramster, all pages belonging to
|
|
* an object reside on the same remotenode and extra is
|
|
* used to record the number of the remotenode so a
|
|
* flush-object operation can specify it
|
|
*/
|
|
void *extra; /* for private use by pampd implementation */
|
|
#endif
|
|
DECL_SENTINEL
|
|
};
|
|
|
|
#define OBJNODE_TREE_MAP_SHIFT 6
|
|
#define OBJNODE_TREE_MAP_SIZE (1UL << OBJNODE_TREE_MAP_SHIFT)
|
|
#define OBJNODE_TREE_MAP_MASK (OBJNODE_TREE_MAP_SIZE-1)
|
|
#define OBJNODE_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
|
|
#define OBJNODE_TREE_MAX_PATH \
|
|
(OBJNODE_TREE_INDEX_BITS/OBJNODE_TREE_MAP_SHIFT + 2)
|
|
|
|
struct tmem_objnode {
|
|
struct tmem_obj *obj;
|
|
DECL_SENTINEL
|
|
void *slots[OBJNODE_TREE_MAP_SIZE];
|
|
unsigned int slots_in_use;
|
|
};
|
|
|
|
struct tmem_handle {
|
|
struct tmem_oid oid; /* 24 bytes */
|
|
uint32_t index;
|
|
uint16_t pool_id;
|
|
uint16_t client_id;
|
|
};
|
|
|
|
|
|
/* pampd abstract datatype methods provided by the PAM implementation */
|
|
struct tmem_pamops {
|
|
void (*create_finish)(void *, bool);
|
|
int (*get_data)(char *, size_t *, bool, void *, struct tmem_pool *,
|
|
struct tmem_oid *, uint32_t);
|
|
int (*get_data_and_free)(char *, size_t *, bool, void *,
|
|
struct tmem_pool *, struct tmem_oid *,
|
|
uint32_t);
|
|
void (*free)(void *, struct tmem_pool *,
|
|
struct tmem_oid *, uint32_t, bool);
|
|
#ifdef CONFIG_RAMSTER
|
|
void (*new_obj)(struct tmem_obj *);
|
|
void (*free_obj)(struct tmem_pool *, struct tmem_obj *, bool);
|
|
void *(*repatriate_preload)(void *, struct tmem_pool *,
|
|
struct tmem_oid *, uint32_t, bool *);
|
|
int (*repatriate)(void *, void *, struct tmem_pool *,
|
|
struct tmem_oid *, uint32_t, bool, void *);
|
|
bool (*is_remote)(void *);
|
|
int (*replace_in_obj)(void *, struct tmem_obj *);
|
|
#endif
|
|
};
|
|
extern void tmem_register_pamops(struct tmem_pamops *m);
|
|
|
|
/* memory allocation methods provided by the host implementation */
|
|
struct tmem_hostops {
|
|
struct tmem_obj *(*obj_alloc)(struct tmem_pool *);
|
|
void (*obj_free)(struct tmem_obj *, struct tmem_pool *);
|
|
struct tmem_objnode *(*objnode_alloc)(struct tmem_pool *);
|
|
void (*objnode_free)(struct tmem_objnode *, struct tmem_pool *);
|
|
};
|
|
extern void tmem_register_hostops(struct tmem_hostops *m);
|
|
|
|
/* core tmem accessor functions */
|
|
extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index,
|
|
bool, void *);
|
|
extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index,
|
|
char *, size_t *, bool, int);
|
|
extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *,
|
|
uint32_t index);
|
|
extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *);
|
|
extern int tmem_destroy_pool(struct tmem_pool *);
|
|
extern void tmem_new_pool(struct tmem_pool *, uint32_t);
|
|
#ifdef CONFIG_RAMSTER
|
|
extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index,
|
|
void *);
|
|
extern void *tmem_localify_get_pampd(struct tmem_pool *, struct tmem_oid *,
|
|
uint32_t index, struct tmem_obj **,
|
|
void **);
|
|
extern void tmem_localify_finish(struct tmem_obj *, uint32_t index,
|
|
void *, void *, bool);
|
|
#endif
|
|
#endif /* _TMEM_H */
|