This contains two important JFFS2 fixes marked for stable:

• a lock ordering problem between the page lock and the internal f->sem
    mutex, which was causing occasional deadlocks in garbage collection, and
  • a scan failure causing moved directories to sometimes end up appearing
    to have hard links.
 
 There are also a couple of trivial MAINTAINERS file updates.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iEYEABECAAYFAlbaGIsACgkQdwG7hYl686OpGQCgu0l4E7cQ/v1Af9kZatj6fnzN
 LvcAnR3SzmiH1jxNGSY7C1mUQWosRl/9
 =Ker9
 -----END PGP SIGNATURE-----

Merge tag 'for-linus-20160304' of git://git.infradead.org/linux-mtd

Pull jffs2 fixes from David Woodhouse:
 "This contains two important JFFS2 fixes marked for stable:

   - a lock ordering problem between the page lock and the internal
     f->sem mutex, which was causing occasional deadlocks in garbage
     collection

   - a scan failure causing moved directories to sometimes end up
     appearing to have hard links.

  There are also a couple of trivial MAINTAINERS file updates"

* tag 'for-linus-20160304' of git://git.infradead.org/linux-mtd:
  MAINTAINERS: add maintainer entry for FREESCALE GPMI NAND driver
  Fix directory hardlinks from deleted directories
  jffs2: Fix page lock / f->sem deadlock
  Revert "jffs2: Fix lock acquisition order bug in jffs2_write_begin"
  MAINTAINERS: update Han's email
This commit is contained in:
Linus Torvalds 2016-03-04 17:36:46 -08:00
commit c51797d25d
6 changed files with 99 additions and 53 deletions

View File

@ -4518,6 +4518,12 @@ L: linuxppc-dev@lists.ozlabs.org
S: Maintained
F: drivers/dma/fsldma.*
FREESCALE GPMI NAND DRIVER
M: Han Xu <han.xu@nxp.com>
L: linux-mtd@lists.infradead.org
S: Maintained
F: drivers/mtd/nand/gpmi-nand/*
FREESCALE I2C CPM DRIVER
M: Jochen Friedrich <jochen@scram.de>
L: linuxppc-dev@lists.ozlabs.org
@ -4534,7 +4540,7 @@ F: include/linux/platform_data/video-imxfb.h
F: drivers/video/fbdev/imxfb.c
FREESCALE QUAD SPI DRIVER
M: Han Xu <han.xu@freescale.com>
M: Han Xu <han.xu@nxp.com>
L: linux-mtd@lists.infradead.org
S: Maintained
F: drivers/mtd/spi-nor/fsl-quadspi.c

View File

@ -2,10 +2,6 @@
JFFS2 LOCKING DOCUMENTATION
---------------------------
At least theoretically, JFFS2 does not require the Big Kernel Lock
(BKL), which was always helpfully obtained for it by Linux 2.4 VFS
code. It has its own locking, as described below.
This document attempts to describe the existing locking rules for
JFFS2. It is not expected to remain perfectly up to date, but ought to
be fairly close.
@ -69,6 +65,7 @@ Ordering constraints:
any f->sem held.
2. Never attempt to lock two file mutexes in one thread.
No ordering rules have been made for doing so.
3. Never lock a page cache page with f->sem held.
erase_completion_lock spinlock

View File

@ -50,7 +50,8 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c)
static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
struct jffs2_inode_cache *ic)
struct jffs2_inode_cache *ic,
int *dir_hardlinks)
{
struct jffs2_full_dirent *fd;
@ -69,19 +70,21 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n",
fd->name, fd->ino, ic->ino);
jffs2_mark_node_obsolete(c, fd->raw);
/* Clear the ic/raw union so it doesn't cause problems later. */
fd->ic = NULL;
continue;
}
/* From this point, fd->raw is no longer used so we can set fd->ic */
fd->ic = child_ic;
child_ic->pino_nlink++;
/* If we appear (at this stage) to have hard-linked directories,
* set a flag to trigger a scan later */
if (fd->type == DT_DIR) {
if (child_ic->pino_nlink) {
JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n",
fd->name, fd->ino, ic->ino);
/* TODO: What do we do about it? */
} else {
child_ic->pino_nlink = ic->ino;
}
} else
child_ic->pino_nlink++;
child_ic->flags |= INO_FLAGS_IS_DIR;
if (child_ic->pino_nlink > 1)
*dir_hardlinks = 1;
}
dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino);
/* Can't free scan_dents so far. We might need them in pass 2 */
@ -95,8 +98,7 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
*/
static int jffs2_build_filesystem(struct jffs2_sb_info *c)
{
int ret;
int i;
int ret, i, dir_hardlinks = 0;
struct jffs2_inode_cache *ic;
struct jffs2_full_dirent *fd;
struct jffs2_full_dirent *dead_fds = NULL;
@ -120,7 +122,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
/* Now scan the directory tree, increasing nlink according to every dirent found. */
for_each_inode(i, c, ic) {
if (ic->scan_dents) {
jffs2_build_inode_pass1(c, ic);
jffs2_build_inode_pass1(c, ic, &dir_hardlinks);
cond_resched();
}
}
@ -156,6 +158,20 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
}
dbg_fsbuild("pass 2a complete\n");
if (dir_hardlinks) {
/* If we detected directory hardlinks earlier, *hopefully*
* they are gone now because some of the links were from
* dead directories which still had some old dirents lying
* around and not yet garbage-collected, but which have
* been discarded above. So clear the pino_nlink field
* in each directory, so that the final scan below can
* print appropriate warnings. */
for_each_inode(i, c, ic) {
if (ic->flags & INO_FLAGS_IS_DIR)
ic->pino_nlink = 0;
}
}
dbg_fsbuild("freeing temporary data structures\n");
/* Finally, we can scan again and free the dirent structs */
@ -163,6 +179,33 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
while(ic->scan_dents) {
fd = ic->scan_dents;
ic->scan_dents = fd->next;
/* We do use the pino_nlink field to count nlink of
* directories during fs build, so set it to the
* parent ino# now. Now that there's hopefully only
* one. */
if (fd->type == DT_DIR) {
if (!fd->ic) {
/* We'll have complained about it and marked the coresponding
raw node obsolete already. Just skip it. */
continue;
}
/* We *have* to have set this in jffs2_build_inode_pass1() */
BUG_ON(!(fd->ic->flags & INO_FLAGS_IS_DIR));
/* We clear ic->pino_nlink ∀ directories' ic *only* if dir_hardlinks
* is set. Otherwise, we know this should never trigger anyway, so
* we don't do the check. And ic->pino_nlink still contains the nlink
* value (which is 1). */
if (dir_hardlinks && fd->ic->pino_nlink) {
JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u is also hard linked from dir ino #%u\n",
fd->name, fd->ino, ic->ino, fd->ic->pino_nlink);
/* Should we unlink it from its previous parent? */
}
/* For directories, ic->pino_nlink holds that parent inode # */
fd->ic->pino_nlink = ic->ino;
}
jffs2_free_full_dirent(fd);
}
ic->scan_dents = NULL;
@ -241,11 +284,7 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c,
/* Reduce nlink of the child. If it's now zero, stick it on the
dead_fds list to be cleaned up later. Else just free the fd */
if (fd->type == DT_DIR)
child_ic->pino_nlink = 0;
else
child_ic->pino_nlink--;
child_ic->pino_nlink--;
if (!child_ic->pino_nlink) {
dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n",

View File

@ -137,39 +137,33 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
struct page *pg;
struct inode *inode = mapping->host;
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
struct jffs2_raw_inode ri;
uint32_t alloc_len = 0;
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
uint32_t pageofs = index << PAGE_CACHE_SHIFT;
int ret = 0;
pg = grab_cache_page_write_begin(mapping, index, flags);
if (!pg)
return -ENOMEM;
*pagep = pg;
jffs2_dbg(1, "%s()\n", __func__);
if (pageofs > inode->i_size) {
ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
if (ret)
return ret;
}
mutex_lock(&f->sem);
pg = grab_cache_page_write_begin(mapping, index, flags);
if (!pg) {
if (alloc_len)
jffs2_complete_reservation(c);
mutex_unlock(&f->sem);
return -ENOMEM;
}
*pagep = pg;
if (alloc_len) {
/* Make new hole frag from old EOF to new page */
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
struct jffs2_raw_inode ri;
struct jffs2_full_dnode *fn;
uint32_t alloc_len;
jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
(unsigned int)inode->i_size, pageofs);
ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
if (ret)
goto out_page;
mutex_lock(&f->sem);
memset(&ri, 0, sizeof(ri));
ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@ -196,6 +190,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
if (IS_ERR(fn)) {
ret = PTR_ERR(fn);
jffs2_complete_reservation(c);
mutex_unlock(&f->sem);
goto out_page;
}
ret = jffs2_add_full_dnode_to_inode(c, f, fn);
@ -210,10 +205,12 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
jffs2_mark_node_obsolete(c, fn->raw);
jffs2_free_full_dnode(fn);
jffs2_complete_reservation(c);
mutex_unlock(&f->sem);
goto out_page;
}
jffs2_complete_reservation(c);
inode->i_size = pageofs;
mutex_unlock(&f->sem);
}
/*
@ -222,18 +219,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
* case of a short-copy.
*/
if (!PageUptodate(pg)) {
mutex_lock(&f->sem);
ret = jffs2_do_readpage_nolock(inode, pg);
mutex_unlock(&f->sem);
if (ret)
goto out_page;
}
mutex_unlock(&f->sem);
jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
return ret;
out_page:
unlock_page(pg);
page_cache_release(pg);
mutex_unlock(&f->sem);
return ret;
}

View File

@ -1296,14 +1296,17 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
BUG_ON(start > orig_start);
}
/* First, use readpage() to read the appropriate page into the page cache */
/* Q: What happens if we actually try to GC the _same_ page for which commit_write()
* triggered garbage collection in the first place?
* A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
* page OK. We'll actually write it out again in commit_write, which is a little
* suboptimal, but at least we're correct.
*/
/* The rules state that we must obtain the page lock *before* f->sem, so
* drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's
* actually going to *change* so we're safe; we only allow reading.
*
* It is important to note that jffs2_write_begin() will ensure that its
* page is marked Uptodate before allocating space. That means that if we
* end up here trying to GC the *same* page that jffs2_write_begin() is
* trying to write out, read_cache_page() will not deadlock. */
mutex_unlock(&f->sem);
pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
mutex_lock(&f->sem);
if (IS_ERR(pg_ptr)) {
pr_warn("read_cache_page() returned error: %ld\n",

View File

@ -194,6 +194,7 @@ struct jffs2_inode_cache {
#define INO_STATE_CLEARING 6 /* In clear_inode() */
#define INO_FLAGS_XATTR_CHECKED 0x01 /* has no duplicate xattr_ref */
#define INO_FLAGS_IS_DIR 0x02 /* is a directory */
#define RAWNODE_CLASS_INODE_CACHE 0
#define RAWNODE_CLASS_XATTR_DATUM 1
@ -249,7 +250,10 @@ struct jffs2_readinode_info
struct jffs2_full_dirent
{
struct jffs2_raw_node_ref *raw;
union {
struct jffs2_raw_node_ref *raw;
struct jffs2_inode_cache *ic; /* Just during part of build */
};
struct jffs2_full_dirent *next;
uint32_t version;
uint32_t ino; /* == zero for unlink */