libceph: a major OSD client update

This is a major sync up, up to ~Jewel.  The highlights are:

- per-session request trees (vs a global per-client tree)
- per-session locking (vs a global per-client rwlock)
- homeless OSD session
- no ad-hoc global per-client lists
- support for pool quotas
- foundation for watch/notify v2 support
- foundation for map check (pool deletion detection) support

The switchover is incomplete: lingering requests can be setup and
teared down but aren't ever reestablished.  This functionality is
restored with the introduction of the new lingering infrastructure
(ceph_osd_linger_request, linger_work, etc) in a later commit.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
Ilya Dryomov 2016-04-28 16:07:26 +02:00
parent 9dd2845ccb
commit 5aea3dcd50
5 changed files with 601 additions and 629 deletions

View File

@ -193,12 +193,12 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
if (copy_from_user(&dl, arg, sizeof(dl)))
return -EFAULT;
down_read(&osdc->map_sem);
down_read(&osdc->lock);
r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
&dl.object_no, &dl.object_offset,
&olen);
if (r < 0) {
up_read(&osdc->map_sem);
up_read(&osdc->lock);
return -EIO;
}
dl.file_offset -= dl.object_offset;
@ -217,7 +217,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
r = ceph_object_locator_to_pg(osdc->osdmap, &oid, &oloc, &pgid);
if (r < 0) {
up_read(&osdc->map_sem);
up_read(&osdc->lock);
return r;
}
@ -230,7 +230,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
} else {
memset(&dl.osd_addr, 0, sizeof(dl.osd_addr));
}
up_read(&osdc->map_sem);
up_read(&osdc->lock);
/* send result back to user */
if (copy_to_user(arg, &dl, sizeof(dl)))

View File

@ -75,7 +75,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
char buf[128];
dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
down_read(&osdc->map_sem);
down_read(&osdc->lock);
pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
if (pool_name) {
size_t len = strlen(pool_name);
@ -107,7 +107,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
ret = -ERANGE;
}
}
up_read(&osdc->map_sem);
up_read(&osdc->lock);
return ret;
}
@ -141,13 +141,13 @@ static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
const char *pool_name;
down_read(&osdc->map_sem);
down_read(&osdc->lock);
pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
if (pool_name)
ret = snprintf(val, size, "%s", pool_name);
else
ret = snprintf(val, size, "%lld", (unsigned long long)pool);
up_read(&osdc->map_sem);
up_read(&osdc->lock);
return ret;
}

View File

@ -33,12 +33,13 @@ struct ceph_osd {
int o_incarnation;
struct rb_node o_node;
struct ceph_connection o_con;
struct list_head o_requests;
struct rb_root o_requests;
struct list_head o_linger_requests;
struct list_head o_osd_lru;
struct ceph_auth_handshake o_auth;
unsigned long lru_ttl;
struct list_head o_keepalive_item;
struct mutex lock;
};
#define CEPH_OSD_SLAB_OPS 2
@ -144,8 +145,6 @@ struct ceph_osd_request_target {
struct ceph_osd_request {
u64 r_tid; /* unique for this client */
struct rb_node r_node;
struct list_head r_req_lru_item;
struct list_head r_osd_item;
struct list_head r_linger_item;
struct list_head r_linger_osd_item;
struct ceph_osd *r_osd;
@ -219,19 +218,16 @@ struct ceph_osd_client {
struct ceph_client *client;
struct ceph_osdmap *osdmap; /* current map */
struct rw_semaphore map_sem;
struct rw_semaphore lock;
struct mutex request_mutex;
struct rb_root osds; /* osds */
struct list_head osd_lru; /* idle osds */
spinlock_t osd_lru_lock;
u64 last_tid; /* tid of last request */
struct rb_root requests; /* pending requests */
struct list_head req_lru; /* in-flight lru */
struct list_head req_unsent; /* unsent/need-resend queue */
struct list_head req_notarget; /* map to no osd */
struct list_head req_linger; /* lingering requests */
int num_requests;
struct ceph_osd homeless_osd;
atomic64_t last_tid; /* tid of last request */
atomic_t num_requests;
atomic_t num_homeless;
struct delayed_work timeout_work;
struct delayed_work osds_timeout_work;
#ifdef CONFIG_DEBUG_FS

View File

@ -182,21 +182,39 @@ static void dump_request(struct seq_file *s, struct ceph_osd_request *req)
seq_putc(s, '\n');
}
static void dump_requests(struct seq_file *s, struct ceph_osd *osd)
{
struct rb_node *n;
mutex_lock(&osd->lock);
for (n = rb_first(&osd->o_requests); n; n = rb_next(n)) {
struct ceph_osd_request *req =
rb_entry(n, struct ceph_osd_request, r_node);
dump_request(s, req);
}
mutex_unlock(&osd->lock);
}
static int osdc_show(struct seq_file *s, void *pp)
{
struct ceph_client *client = s->private;
struct ceph_osd_client *osdc = &client->osdc;
struct rb_node *p;
struct rb_node *n;
mutex_lock(&osdc->request_mutex);
for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
struct ceph_osd_request *req;
down_read(&osdc->lock);
seq_printf(s, "REQUESTS %d homeless %d\n",
atomic_read(&osdc->num_requests),
atomic_read(&osdc->num_homeless));
for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
req = rb_entry(p, struct ceph_osd_request, r_node);
dump_request(s, req);
dump_requests(s, osd);
}
mutex_unlock(&osdc->request_mutex);
dump_requests(s, &osdc->homeless_osd);
up_read(&osdc->lock);
return 0;
}

File diff suppressed because it is too large Load Diff