633 lines
16 KiB
C
633 lines
16 KiB
C
/*
|
|
* Copyright (C) 2008 Oracle. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public
|
|
* License v2 as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public
|
|
* License along with this program; if not, write to the
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
* Boston, MA 021110-1307, USA.
|
|
*
|
|
* Based on jffs2 zlib code:
|
|
* Copyright © 2001-2007 Red Hat, Inc.
|
|
* Created by David Woodhouse <dwmw2@infradead.org>
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/zlib.h>
|
|
#include <linux/zutil.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/init.h>
|
|
#include <linux/err.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/bio.h>
|
|
#include "compression.h"
|
|
|
|
/* Plan: call deflate() with avail_in == *sourcelen,
|
|
avail_out = *dstlen - 12 and flush == Z_FINISH.
|
|
If it doesn't manage to finish, call it again with
|
|
avail_in == 0 and avail_out set to the remaining 12
|
|
bytes for it to clean up.
|
|
Q: Is 12 bytes sufficient?
|
|
*/
|
|
#define STREAM_END_SPACE 12
|
|
|
|
struct workspace {
|
|
z_stream inf_strm;
|
|
z_stream def_strm;
|
|
char *buf;
|
|
struct list_head list;
|
|
};
|
|
|
|
static LIST_HEAD(idle_workspace);
|
|
static DEFINE_SPINLOCK(workspace_lock);
|
|
static unsigned long num_workspace;
|
|
static atomic_t alloc_workspace = ATOMIC_INIT(0);
|
|
static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
|
|
|
|
/*
|
|
* this finds an available zlib workspace or allocates a new one
|
|
* NULL or an ERR_PTR is returned if things go bad.
|
|
*/
|
|
static struct workspace *find_zlib_workspace(void)
|
|
{
|
|
struct workspace *workspace;
|
|
int ret;
|
|
int cpus = num_online_cpus();
|
|
|
|
again:
|
|
spin_lock(&workspace_lock);
|
|
if (!list_empty(&idle_workspace)) {
|
|
workspace = list_entry(idle_workspace.next, struct workspace,
|
|
list);
|
|
list_del(&workspace->list);
|
|
num_workspace--;
|
|
spin_unlock(&workspace_lock);
|
|
return workspace;
|
|
|
|
}
|
|
spin_unlock(&workspace_lock);
|
|
if (atomic_read(&alloc_workspace) > cpus) {
|
|
DEFINE_WAIT(wait);
|
|
prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
|
|
if (atomic_read(&alloc_workspace) > cpus)
|
|
schedule();
|
|
finish_wait(&workspace_wait, &wait);
|
|
goto again;
|
|
}
|
|
atomic_inc(&alloc_workspace);
|
|
workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
|
|
if (!workspace) {
|
|
ret = -ENOMEM;
|
|
goto fail;
|
|
}
|
|
|
|
workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
|
|
if (!workspace->def_strm.workspace) {
|
|
ret = -ENOMEM;
|
|
goto fail;
|
|
}
|
|
workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
|
|
if (!workspace->inf_strm.workspace) {
|
|
ret = -ENOMEM;
|
|
goto fail_inflate;
|
|
}
|
|
workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
|
|
if (!workspace->buf) {
|
|
ret = -ENOMEM;
|
|
goto fail_kmalloc;
|
|
}
|
|
return workspace;
|
|
|
|
fail_kmalloc:
|
|
vfree(workspace->inf_strm.workspace);
|
|
fail_inflate:
|
|
vfree(workspace->def_strm.workspace);
|
|
fail:
|
|
kfree(workspace);
|
|
atomic_dec(&alloc_workspace);
|
|
wake_up(&workspace_wait);
|
|
return ERR_PTR(ret);
|
|
}
|
|
|
|
/*
|
|
* put a workspace struct back on the list or free it if we have enough
|
|
* idle ones sitting around
|
|
*/
|
|
static int free_workspace(struct workspace *workspace)
|
|
{
|
|
spin_lock(&workspace_lock);
|
|
if (num_workspace < num_online_cpus()) {
|
|
list_add_tail(&workspace->list, &idle_workspace);
|
|
num_workspace++;
|
|
spin_unlock(&workspace_lock);
|
|
if (waitqueue_active(&workspace_wait))
|
|
wake_up(&workspace_wait);
|
|
return 0;
|
|
}
|
|
spin_unlock(&workspace_lock);
|
|
vfree(workspace->def_strm.workspace);
|
|
vfree(workspace->inf_strm.workspace);
|
|
kfree(workspace->buf);
|
|
kfree(workspace);
|
|
|
|
atomic_dec(&alloc_workspace);
|
|
if (waitqueue_active(&workspace_wait))
|
|
wake_up(&workspace_wait);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* cleanup function for module exit
|
|
*/
|
|
static void free_workspaces(void)
|
|
{
|
|
struct workspace *workspace;
|
|
while (!list_empty(&idle_workspace)) {
|
|
workspace = list_entry(idle_workspace.next, struct workspace,
|
|
list);
|
|
list_del(&workspace->list);
|
|
vfree(workspace->def_strm.workspace);
|
|
vfree(workspace->inf_strm.workspace);
|
|
kfree(workspace->buf);
|
|
kfree(workspace);
|
|
atomic_dec(&alloc_workspace);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* given an address space and start/len, compress the bytes.
|
|
*
|
|
* pages are allocated to hold the compressed result and stored
|
|
* in 'pages'
|
|
*
|
|
* out_pages is used to return the number of pages allocated. There
|
|
* may be pages allocated even if we return an error
|
|
*
|
|
* total_in is used to return the number of bytes actually read. It
|
|
* may be smaller then len if we had to exit early because we
|
|
* ran out of room in the pages array or because we cross the
|
|
* max_out threshold.
|
|
*
|
|
* total_out is used to return the total number of compressed bytes
|
|
*
|
|
* max_out tells us the max number of bytes that we're allowed to
|
|
* stuff into pages
|
|
*/
|
|
int btrfs_zlib_compress_pages(struct address_space *mapping,
|
|
u64 start, unsigned long len,
|
|
struct page **pages,
|
|
unsigned long nr_dest_pages,
|
|
unsigned long *out_pages,
|
|
unsigned long *total_in,
|
|
unsigned long *total_out,
|
|
unsigned long max_out)
|
|
{
|
|
int ret;
|
|
struct workspace *workspace;
|
|
char *data_in;
|
|
char *cpage_out;
|
|
int nr_pages = 0;
|
|
struct page *in_page = NULL;
|
|
struct page *out_page = NULL;
|
|
int out_written = 0;
|
|
int in_read = 0;
|
|
unsigned long bytes_left;
|
|
|
|
*out_pages = 0;
|
|
*total_out = 0;
|
|
*total_in = 0;
|
|
|
|
workspace = find_zlib_workspace();
|
|
if (IS_ERR(workspace))
|
|
return -1;
|
|
|
|
if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
|
|
printk(KERN_WARNING "deflateInit failed\n");
|
|
ret = -1;
|
|
goto out;
|
|
}
|
|
|
|
workspace->def_strm.total_in = 0;
|
|
workspace->def_strm.total_out = 0;
|
|
|
|
in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
|
|
data_in = kmap(in_page);
|
|
|
|
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
|
|
cpage_out = kmap(out_page);
|
|
pages[0] = out_page;
|
|
nr_pages = 1;
|
|
|
|
workspace->def_strm.next_in = data_in;
|
|
workspace->def_strm.next_out = cpage_out;
|
|
workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
|
|
workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
|
|
|
|
out_written = 0;
|
|
in_read = 0;
|
|
|
|
while (workspace->def_strm.total_in < len) {
|
|
ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
|
|
if (ret != Z_OK) {
|
|
printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
|
|
ret);
|
|
zlib_deflateEnd(&workspace->def_strm);
|
|
ret = -1;
|
|
goto out;
|
|
}
|
|
|
|
/* we're making it bigger, give up */
|
|
if (workspace->def_strm.total_in > 8192 &&
|
|
workspace->def_strm.total_in <
|
|
workspace->def_strm.total_out) {
|
|
ret = -1;
|
|
goto out;
|
|
}
|
|
/* we need another page for writing out. Test this
|
|
* before the total_in so we will pull in a new page for
|
|
* the stream end if required
|
|
*/
|
|
if (workspace->def_strm.avail_out == 0) {
|
|
kunmap(out_page);
|
|
if (nr_pages == nr_dest_pages) {
|
|
out_page = NULL;
|
|
ret = -1;
|
|
goto out;
|
|
}
|
|
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
|
|
cpage_out = kmap(out_page);
|
|
pages[nr_pages] = out_page;
|
|
nr_pages++;
|
|
workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
|
|
workspace->def_strm.next_out = cpage_out;
|
|
}
|
|
/* we're all done */
|
|
if (workspace->def_strm.total_in >= len)
|
|
break;
|
|
|
|
/* we've read in a full page, get a new one */
|
|
if (workspace->def_strm.avail_in == 0) {
|
|
if (workspace->def_strm.total_out > max_out)
|
|
break;
|
|
|
|
bytes_left = len - workspace->def_strm.total_in;
|
|
kunmap(in_page);
|
|
page_cache_release(in_page);
|
|
|
|
start += PAGE_CACHE_SIZE;
|
|
in_page = find_get_page(mapping,
|
|
start >> PAGE_CACHE_SHIFT);
|
|
data_in = kmap(in_page);
|
|
workspace->def_strm.avail_in = min(bytes_left,
|
|
PAGE_CACHE_SIZE);
|
|
workspace->def_strm.next_in = data_in;
|
|
}
|
|
}
|
|
workspace->def_strm.avail_in = 0;
|
|
ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
|
|
zlib_deflateEnd(&workspace->def_strm);
|
|
|
|
if (ret != Z_STREAM_END) {
|
|
ret = -1;
|
|
goto out;
|
|
}
|
|
|
|
if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
|
|
ret = -1;
|
|
goto out;
|
|
}
|
|
|
|
ret = 0;
|
|
*total_out = workspace->def_strm.total_out;
|
|
*total_in = workspace->def_strm.total_in;
|
|
out:
|
|
*out_pages = nr_pages;
|
|
if (out_page)
|
|
kunmap(out_page);
|
|
|
|
if (in_page) {
|
|
kunmap(in_page);
|
|
page_cache_release(in_page);
|
|
}
|
|
free_workspace(workspace);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* pages_in is an array of pages with compressed data.
|
|
*
|
|
* disk_start is the starting logical offset of this array in the file
|
|
*
|
|
* bvec is a bio_vec of pages from the file that we want to decompress into
|
|
*
|
|
* vcnt is the count of pages in the biovec
|
|
*
|
|
* srclen is the number of bytes in pages_in
|
|
*
|
|
* The basic idea is that we have a bio that was created by readpages.
|
|
* The pages in the bio are for the uncompressed data, and they may not
|
|
* be contiguous. They all correspond to the range of bytes covered by
|
|
* the compressed extent.
|
|
*/
|
|
int btrfs_zlib_decompress_biovec(struct page **pages_in,
|
|
u64 disk_start,
|
|
struct bio_vec *bvec,
|
|
int vcnt,
|
|
size_t srclen)
|
|
{
|
|
int ret = 0;
|
|
int wbits = MAX_WBITS;
|
|
struct workspace *workspace;
|
|
char *data_in;
|
|
size_t total_out = 0;
|
|
unsigned long page_bytes_left;
|
|
unsigned long page_in_index = 0;
|
|
unsigned long page_out_index = 0;
|
|
struct page *page_out;
|
|
unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
|
|
PAGE_CACHE_SIZE;
|
|
unsigned long buf_start;
|
|
unsigned long buf_offset;
|
|
unsigned long bytes;
|
|
unsigned long working_bytes;
|
|
unsigned long pg_offset;
|
|
unsigned long start_byte;
|
|
unsigned long current_buf_start;
|
|
char *kaddr;
|
|
|
|
workspace = find_zlib_workspace();
|
|
if (IS_ERR(workspace))
|
|
return -ENOMEM;
|
|
|
|
data_in = kmap(pages_in[page_in_index]);
|
|
workspace->inf_strm.next_in = data_in;
|
|
workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
|
|
workspace->inf_strm.total_in = 0;
|
|
|
|
workspace->inf_strm.total_out = 0;
|
|
workspace->inf_strm.next_out = workspace->buf;
|
|
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
|
|
page_out = bvec[page_out_index].bv_page;
|
|
page_bytes_left = PAGE_CACHE_SIZE;
|
|
pg_offset = 0;
|
|
|
|
/* If it's deflate, and it's got no preset dictionary, then
|
|
we can tell zlib to skip the adler32 check. */
|
|
if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
|
|
((data_in[0] & 0x0f) == Z_DEFLATED) &&
|
|
!(((data_in[0]<<8) + data_in[1]) % 31)) {
|
|
|
|
wbits = -((data_in[0] >> 4) + 8);
|
|
workspace->inf_strm.next_in += 2;
|
|
workspace->inf_strm.avail_in -= 2;
|
|
}
|
|
|
|
if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
|
|
printk(KERN_WARNING "inflateInit failed\n");
|
|
ret = -1;
|
|
goto out;
|
|
}
|
|
while (workspace->inf_strm.total_in < srclen) {
|
|
ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
|
|
if (ret != Z_OK && ret != Z_STREAM_END)
|
|
break;
|
|
/*
|
|
* buf start is the byte offset we're of the start of
|
|
* our workspace buffer
|
|
*/
|
|
buf_start = total_out;
|
|
|
|
/* total_out is the last byte of the workspace buffer */
|
|
total_out = workspace->inf_strm.total_out;
|
|
|
|
working_bytes = total_out - buf_start;
|
|
|
|
/*
|
|
* start byte is the first byte of the page we're currently
|
|
* copying into relative to the start of the compressed data.
|
|
*/
|
|
start_byte = page_offset(page_out) - disk_start;
|
|
|
|
if (working_bytes == 0) {
|
|
/* we didn't make progress in this inflate
|
|
* call, we're done
|
|
*/
|
|
if (ret != Z_STREAM_END)
|
|
ret = -1;
|
|
break;
|
|
}
|
|
|
|
/* we haven't yet hit data corresponding to this page */
|
|
if (total_out <= start_byte)
|
|
goto next;
|
|
|
|
/*
|
|
* the start of the data we care about is offset into
|
|
* the middle of our working buffer
|
|
*/
|
|
if (total_out > start_byte && buf_start < start_byte) {
|
|
buf_offset = start_byte - buf_start;
|
|
working_bytes -= buf_offset;
|
|
} else {
|
|
buf_offset = 0;
|
|
}
|
|
current_buf_start = buf_start;
|
|
|
|
/* copy bytes from the working buffer into the pages */
|
|
while (working_bytes > 0) {
|
|
bytes = min(PAGE_CACHE_SIZE - pg_offset,
|
|
PAGE_CACHE_SIZE - buf_offset);
|
|
bytes = min(bytes, working_bytes);
|
|
kaddr = kmap_atomic(page_out, KM_USER0);
|
|
memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
|
|
bytes);
|
|
kunmap_atomic(kaddr, KM_USER0);
|
|
flush_dcache_page(page_out);
|
|
|
|
pg_offset += bytes;
|
|
page_bytes_left -= bytes;
|
|
buf_offset += bytes;
|
|
working_bytes -= bytes;
|
|
current_buf_start += bytes;
|
|
|
|
/* check if we need to pick another page */
|
|
if (page_bytes_left == 0) {
|
|
page_out_index++;
|
|
if (page_out_index >= vcnt) {
|
|
ret = 0;
|
|
goto done;
|
|
}
|
|
|
|
page_out = bvec[page_out_index].bv_page;
|
|
pg_offset = 0;
|
|
page_bytes_left = PAGE_CACHE_SIZE;
|
|
start_byte = page_offset(page_out) - disk_start;
|
|
|
|
/*
|
|
* make sure our new page is covered by this
|
|
* working buffer
|
|
*/
|
|
if (total_out <= start_byte)
|
|
goto next;
|
|
|
|
/* the next page in the biovec might not
|
|
* be adjacent to the last page, but it
|
|
* might still be found inside this working
|
|
* buffer. bump our offset pointer
|
|
*/
|
|
if (total_out > start_byte &&
|
|
current_buf_start < start_byte) {
|
|
buf_offset = start_byte - buf_start;
|
|
working_bytes = total_out - start_byte;
|
|
current_buf_start = buf_start +
|
|
buf_offset;
|
|
}
|
|
}
|
|
}
|
|
next:
|
|
workspace->inf_strm.next_out = workspace->buf;
|
|
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
|
|
|
|
if (workspace->inf_strm.avail_in == 0) {
|
|
unsigned long tmp;
|
|
kunmap(pages_in[page_in_index]);
|
|
page_in_index++;
|
|
if (page_in_index >= total_pages_in) {
|
|
data_in = NULL;
|
|
break;
|
|
}
|
|
data_in = kmap(pages_in[page_in_index]);
|
|
workspace->inf_strm.next_in = data_in;
|
|
tmp = srclen - workspace->inf_strm.total_in;
|
|
workspace->inf_strm.avail_in = min(tmp,
|
|
PAGE_CACHE_SIZE);
|
|
}
|
|
}
|
|
if (ret != Z_STREAM_END)
|
|
ret = -1;
|
|
else
|
|
ret = 0;
|
|
done:
|
|
zlib_inflateEnd(&workspace->inf_strm);
|
|
if (data_in)
|
|
kunmap(pages_in[page_in_index]);
|
|
out:
|
|
free_workspace(workspace);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* a less complex decompression routine. Our compressed data fits in a
|
|
* single page, and we want to read a single page out of it.
|
|
* start_byte tells us the offset into the compressed data we're interested in
|
|
*/
|
|
int btrfs_zlib_decompress(unsigned char *data_in,
|
|
struct page *dest_page,
|
|
unsigned long start_byte,
|
|
size_t srclen, size_t destlen)
|
|
{
|
|
int ret = 0;
|
|
int wbits = MAX_WBITS;
|
|
struct workspace *workspace;
|
|
unsigned long bytes_left = destlen;
|
|
unsigned long total_out = 0;
|
|
char *kaddr;
|
|
|
|
if (destlen > PAGE_CACHE_SIZE)
|
|
return -ENOMEM;
|
|
|
|
workspace = find_zlib_workspace();
|
|
if (IS_ERR(workspace))
|
|
return -ENOMEM;
|
|
|
|
workspace->inf_strm.next_in = data_in;
|
|
workspace->inf_strm.avail_in = srclen;
|
|
workspace->inf_strm.total_in = 0;
|
|
|
|
workspace->inf_strm.next_out = workspace->buf;
|
|
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
|
|
workspace->inf_strm.total_out = 0;
|
|
/* If it's deflate, and it's got no preset dictionary, then
|
|
we can tell zlib to skip the adler32 check. */
|
|
if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
|
|
((data_in[0] & 0x0f) == Z_DEFLATED) &&
|
|
!(((data_in[0]<<8) + data_in[1]) % 31)) {
|
|
|
|
wbits = -((data_in[0] >> 4) + 8);
|
|
workspace->inf_strm.next_in += 2;
|
|
workspace->inf_strm.avail_in -= 2;
|
|
}
|
|
|
|
if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
|
|
printk(KERN_WARNING "inflateInit failed\n");
|
|
ret = -1;
|
|
goto out;
|
|
}
|
|
|
|
while (bytes_left > 0) {
|
|
unsigned long buf_start;
|
|
unsigned long buf_offset;
|
|
unsigned long bytes;
|
|
unsigned long pg_offset = 0;
|
|
|
|
ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
|
|
if (ret != Z_OK && ret != Z_STREAM_END)
|
|
break;
|
|
|
|
buf_start = total_out;
|
|
total_out = workspace->inf_strm.total_out;
|
|
|
|
if (total_out == buf_start) {
|
|
ret = -1;
|
|
break;
|
|
}
|
|
|
|
if (total_out <= start_byte)
|
|
goto next;
|
|
|
|
if (total_out > start_byte && buf_start < start_byte)
|
|
buf_offset = start_byte - buf_start;
|
|
else
|
|
buf_offset = 0;
|
|
|
|
bytes = min(PAGE_CACHE_SIZE - pg_offset,
|
|
PAGE_CACHE_SIZE - buf_offset);
|
|
bytes = min(bytes, bytes_left);
|
|
|
|
kaddr = kmap_atomic(dest_page, KM_USER0);
|
|
memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
|
|
kunmap_atomic(kaddr, KM_USER0);
|
|
|
|
pg_offset += bytes;
|
|
bytes_left -= bytes;
|
|
next:
|
|
workspace->inf_strm.next_out = workspace->buf;
|
|
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
|
|
}
|
|
|
|
if (ret != Z_STREAM_END && bytes_left != 0)
|
|
ret = -1;
|
|
else
|
|
ret = 0;
|
|
|
|
zlib_inflateEnd(&workspace->inf_strm);
|
|
out:
|
|
free_workspace(workspace);
|
|
return ret;
|
|
}
|
|
|
|
void btrfs_zlib_exit(void)
|
|
{
|
|
free_workspaces();
|
|
}
|