ext4: Fix file fragmentation during large file write.
The range_cyclic writeback mode uses the address_space writeback_index as the start index for writeback. With delayed allocation we were updating writeback_index wrongly resulting in highly fragmented file. This patch reduces the number of extents reduced from 4000 to 27 for a 3GB file. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
parent
17bc6c30cf
commit
22208dedbd
|
@ -1648,6 +1648,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
|
||||||
int ret = 0, err, nr_pages, i;
|
int ret = 0, err, nr_pages, i;
|
||||||
unsigned long index, end;
|
unsigned long index, end;
|
||||||
struct pagevec pvec;
|
struct pagevec pvec;
|
||||||
|
long pages_skipped;
|
||||||
|
|
||||||
BUG_ON(mpd->next_page <= mpd->first_page);
|
BUG_ON(mpd->next_page <= mpd->first_page);
|
||||||
pagevec_init(&pvec, 0);
|
pagevec_init(&pvec, 0);
|
||||||
|
@ -1655,7 +1656,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
|
||||||
end = mpd->next_page - 1;
|
end = mpd->next_page - 1;
|
||||||
|
|
||||||
while (index <= end) {
|
while (index <= end) {
|
||||||
/* XXX: optimize tail */
|
|
||||||
/*
|
/*
|
||||||
* We can use PAGECACHE_TAG_DIRTY lookup here because
|
* We can use PAGECACHE_TAG_DIRTY lookup here because
|
||||||
* even though we have cleared the dirty flag on the page
|
* even though we have cleared the dirty flag on the page
|
||||||
|
@ -1673,8 +1673,13 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
|
||||||
for (i = 0; i < nr_pages; i++) {
|
for (i = 0; i < nr_pages; i++) {
|
||||||
struct page *page = pvec.pages[i];
|
struct page *page = pvec.pages[i];
|
||||||
|
|
||||||
|
pages_skipped = mpd->wbc->pages_skipped;
|
||||||
err = mapping->a_ops->writepage(page, mpd->wbc);
|
err = mapping->a_ops->writepage(page, mpd->wbc);
|
||||||
if (!err)
|
if (!err && (pages_skipped == mpd->wbc->pages_skipped))
|
||||||
|
/*
|
||||||
|
* have successfully written the page
|
||||||
|
* without skipping the same
|
||||||
|
*/
|
||||||
mpd->pages_written++;
|
mpd->pages_written++;
|
||||||
/*
|
/*
|
||||||
* In error case, we have to continue because
|
* In error case, we have to continue because
|
||||||
|
@ -2110,7 +2115,6 @@ static int mpage_da_writepages(struct address_space *mapping,
|
||||||
struct writeback_control *wbc,
|
struct writeback_control *wbc,
|
||||||
struct mpage_da_data *mpd)
|
struct mpage_da_data *mpd)
|
||||||
{
|
{
|
||||||
long to_write;
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!mpd->get_block)
|
if (!mpd->get_block)
|
||||||
|
@ -2125,19 +2129,18 @@ static int mpage_da_writepages(struct address_space *mapping,
|
||||||
mpd->pages_written = 0;
|
mpd->pages_written = 0;
|
||||||
mpd->retval = 0;
|
mpd->retval = 0;
|
||||||
|
|
||||||
to_write = wbc->nr_to_write;
|
|
||||||
|
|
||||||
ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
|
ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Handle last extent of pages
|
* Handle last extent of pages
|
||||||
*/
|
*/
|
||||||
if (!mpd->io_done && mpd->next_page != mpd->first_page) {
|
if (!mpd->io_done && mpd->next_page != mpd->first_page) {
|
||||||
if (mpage_da_map_blocks(mpd) == 0)
|
if (mpage_da_map_blocks(mpd) == 0)
|
||||||
mpage_da_submit_io(mpd);
|
mpage_da_submit_io(mpd);
|
||||||
}
|
|
||||||
|
|
||||||
wbc->nr_to_write = to_write - mpd->pages_written;
|
mpd->io_done = 1;
|
||||||
|
ret = MPAGE_DA_EXTENT_TAIL;
|
||||||
|
}
|
||||||
|
wbc->nr_to_write -= mpd->pages_written;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2366,11 +2369,14 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
|
||||||
static int ext4_da_writepages(struct address_space *mapping,
|
static int ext4_da_writepages(struct address_space *mapping,
|
||||||
struct writeback_control *wbc)
|
struct writeback_control *wbc)
|
||||||
{
|
{
|
||||||
|
pgoff_t index;
|
||||||
|
int range_whole = 0;
|
||||||
handle_t *handle = NULL;
|
handle_t *handle = NULL;
|
||||||
struct mpage_da_data mpd;
|
struct mpage_da_data mpd;
|
||||||
struct inode *inode = mapping->host;
|
struct inode *inode = mapping->host;
|
||||||
|
int no_nrwrite_index_update;
|
||||||
|
long pages_written = 0, pages_skipped;
|
||||||
int needed_blocks, ret = 0, nr_to_writebump = 0;
|
int needed_blocks, ret = 0, nr_to_writebump = 0;
|
||||||
long to_write, pages_skipped = 0;
|
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
|
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2390,16 +2396,26 @@ static int ext4_da_writepages(struct address_space *mapping,
|
||||||
nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
|
nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
|
||||||
wbc->nr_to_write = sbi->s_mb_stream_request;
|
wbc->nr_to_write = sbi->s_mb_stream_request;
|
||||||
}
|
}
|
||||||
|
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
|
||||||
|
range_whole = 1;
|
||||||
|
|
||||||
|
if (wbc->range_cyclic)
|
||||||
pages_skipped = wbc->pages_skipped;
|
index = mapping->writeback_index;
|
||||||
|
else
|
||||||
|
index = wbc->range_start >> PAGE_CACHE_SHIFT;
|
||||||
|
|
||||||
mpd.wbc = wbc;
|
mpd.wbc = wbc;
|
||||||
mpd.inode = mapping->host;
|
mpd.inode = mapping->host;
|
||||||
|
|
||||||
restart_loop:
|
/*
|
||||||
to_write = wbc->nr_to_write;
|
* we don't want write_cache_pages to update
|
||||||
while (!ret && to_write > 0) {
|
* nr_to_write and writeback_index
|
||||||
|
*/
|
||||||
|
no_nrwrite_index_update = wbc->no_nrwrite_index_update;
|
||||||
|
wbc->no_nrwrite_index_update = 1;
|
||||||
|
pages_skipped = wbc->pages_skipped;
|
||||||
|
|
||||||
|
while (!ret && wbc->nr_to_write > 0) {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* we insert one extent at a time. So we need
|
* we insert one extent at a time. So we need
|
||||||
|
@ -2420,46 +2436,53 @@ restart_loop:
|
||||||
dump_stack();
|
dump_stack();
|
||||||
goto out_writepages;
|
goto out_writepages;
|
||||||
}
|
}
|
||||||
to_write -= wbc->nr_to_write;
|
|
||||||
|
|
||||||
mpd.get_block = ext4_da_get_block_write;
|
mpd.get_block = ext4_da_get_block_write;
|
||||||
ret = mpage_da_writepages(mapping, wbc, &mpd);
|
ret = mpage_da_writepages(mapping, wbc, &mpd);
|
||||||
|
|
||||||
ext4_journal_stop(handle);
|
ext4_journal_stop(handle);
|
||||||
|
|
||||||
if (mpd.retval == -ENOSPC)
|
if (mpd.retval == -ENOSPC) {
|
||||||
|
/* commit the transaction which would
|
||||||
|
* free blocks released in the transaction
|
||||||
|
* and try again
|
||||||
|
*/
|
||||||
jbd2_journal_force_commit_nested(sbi->s_journal);
|
jbd2_journal_force_commit_nested(sbi->s_journal);
|
||||||
|
wbc->pages_skipped = pages_skipped;
|
||||||
/* reset the retry count */
|
ret = 0;
|
||||||
if (ret == MPAGE_DA_EXTENT_TAIL) {
|
} else if (ret == MPAGE_DA_EXTENT_TAIL) {
|
||||||
/*
|
/*
|
||||||
* got one extent now try with
|
* got one extent now try with
|
||||||
* rest of the pages
|
* rest of the pages
|
||||||
*/
|
*/
|
||||||
to_write += wbc->nr_to_write;
|
pages_written += mpd.pages_written;
|
||||||
|
wbc->pages_skipped = pages_skipped;
|
||||||
ret = 0;
|
ret = 0;
|
||||||
} else if (wbc->nr_to_write) {
|
} else if (wbc->nr_to_write)
|
||||||
/*
|
/*
|
||||||
* There is no more writeout needed
|
* There is no more writeout needed
|
||||||
* or we requested for a noblocking writeout
|
* or we requested for a noblocking writeout
|
||||||
* and we found the device congested
|
* and we found the device congested
|
||||||
*/
|
*/
|
||||||
to_write += wbc->nr_to_write;
|
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
wbc->nr_to_write = to_write;
|
|
||||||
}
|
}
|
||||||
|
if (pages_skipped != wbc->pages_skipped)
|
||||||
|
printk(KERN_EMERG "This should not happen leaving %s "
|
||||||
|
"with nr_to_write = %ld ret = %d\n",
|
||||||
|
__func__, wbc->nr_to_write, ret);
|
||||||
|
|
||||||
if (!wbc->range_cyclic && (pages_skipped != wbc->pages_skipped)) {
|
/* Update index */
|
||||||
/* We skipped pages in this loop */
|
index += pages_written;
|
||||||
wbc->nr_to_write = to_write +
|
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
|
||||||
wbc->pages_skipped - pages_skipped;
|
/*
|
||||||
wbc->pages_skipped = pages_skipped;
|
* set the writeback_index so that range_cyclic
|
||||||
goto restart_loop;
|
* mode will write it back later
|
||||||
}
|
*/
|
||||||
|
mapping->writeback_index = index;
|
||||||
|
|
||||||
out_writepages:
|
out_writepages:
|
||||||
wbc->nr_to_write = to_write - nr_to_writebump;
|
if (!no_nrwrite_index_update)
|
||||||
|
wbc->no_nrwrite_index_update = 0;
|
||||||
|
wbc->nr_to_write -= nr_to_writebump;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue