Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

37
fs/ubifs/Kconfig Normal file
View file

@ -0,0 +1,37 @@
config UBIFS_FS
tristate "UBIFS file system support"
select CRC16
select CRC32
select CRYPTO if UBIFS_FS_ADVANCED_COMPR
select CRYPTO if UBIFS_FS_LZO
select CRYPTO if UBIFS_FS_ZLIB
select CRYPTO_LZO if UBIFS_FS_LZO
select CRYPTO_DEFLATE if UBIFS_FS_ZLIB
depends on MTD_UBI
help
UBIFS is a file system for flash devices which works on top of UBI.
config UBIFS_FS_ADVANCED_COMPR
bool "Advanced compression options"
depends on UBIFS_FS
help
This option allows to explicitly choose which compressions, if any,
are enabled in UBIFS. Removing compressors means inability to read
existing file systems.
If unsure, say 'N'.
config UBIFS_FS_LZO
bool "LZO compression support" if UBIFS_FS_ADVANCED_COMPR
depends on UBIFS_FS
default y
help
LZO compressor is generally faster than zlib but compresses worse.
Say 'Y' if unsure.
config UBIFS_FS_ZLIB
bool "ZLIB compression support" if UBIFS_FS_ADVANCED_COMPR
depends on UBIFS_FS
default y
help
Zlib compresses better than LZO but it is slower. Say 'Y' if unsure.

6
fs/ubifs/Makefile Normal file
View file

@ -0,0 +1,6 @@
obj-$(CONFIG_UBIFS_FS) += ubifs.o
ubifs-y += shrinker.o journal.o file.o dir.o super.o sb.o io.o
ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o
ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o
ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o xattr.o debug.o

730
fs/ubifs/budget.c Normal file
View file

@ -0,0 +1,730 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Adrian Hunter
* Artem Bityutskiy (Битюцкий Артём)
*/
/*
* This file implements the budgeting sub-system which is responsible for UBIFS
* space management.
*
* Factors such as compression, wasted space at the ends of LEBs, space in other
* journal heads, the effect of updates on the index, and so on, make it
* impossible to accurately predict the amount of space needed. Consequently
* approximations are used.
*/
#include "ubifs.h"
#include <linux/writeback.h>
#include <linux/math64.h>
/*
* When pessimistic budget calculations say that there is no enough space,
* UBIFS starts writing back dirty inodes and pages, doing garbage collection,
* or committing. The below constant defines maximum number of times UBIFS
* repeats the operations.
*/
#define MAX_MKSPC_RETRIES 3
/*
* The below constant defines amount of dirty pages which should be written
* back at when trying to shrink the liability.
*/
#define NR_TO_WRITE 16
/**
* shrink_liability - write-back some dirty pages/inodes.
* @c: UBIFS file-system description object
* @nr_to_write: how many dirty pages to write-back
*
* This function shrinks UBIFS liability by means of writing back some amount
* of dirty inodes and their pages.
*
* Note, this function synchronizes even VFS inodes which are locked
* (@i_mutex) by the caller of the budgeting function, because write-back does
* not touch @i_mutex.
*/
static void shrink_liability(struct ubifs_info *c, int nr_to_write)
{
down_read(&c->vfs_sb->s_umount);
writeback_inodes_sb(c->vfs_sb, WB_REASON_FS_FREE_SPACE);
up_read(&c->vfs_sb->s_umount);
}
/**
* run_gc - run garbage collector.
* @c: UBIFS file-system description object
*
* This function runs garbage collector to make some more free space. Returns
* zero if a free LEB has been produced, %-EAGAIN if commit is required, and a
* negative error code in case of failure.
*/
static int run_gc(struct ubifs_info *c)
{
int err, lnum;
/* Make some free space by garbage-collecting dirty space */
down_read(&c->commit_sem);
lnum = ubifs_garbage_collect(c, 1);
up_read(&c->commit_sem);
if (lnum < 0)
return lnum;
/* GC freed one LEB, return it to lprops */
dbg_budg("GC freed LEB %d", lnum);
err = ubifs_return_leb(c, lnum);
if (err)
return err;
return 0;
}
/**
* get_liability - calculate current liability.
* @c: UBIFS file-system description object
*
* This function calculates and returns current UBIFS liability, i.e. the
* amount of bytes UBIFS has "promised" to write to the media.
*/
static long long get_liability(struct ubifs_info *c)
{
long long liab;
spin_lock(&c->space_lock);
liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth;
spin_unlock(&c->space_lock);
return liab;
}
/**
* make_free_space - make more free space on the file-system.
* @c: UBIFS file-system description object
*
* This function is called when an operation cannot be budgeted because there
* is supposedly no free space. But in most cases there is some free space:
* o budgeting is pessimistic, so it always budgets more than it is actually
* needed, so shrinking the liability is one way to make free space - the
* cached data will take less space then it was budgeted for;
* o GC may turn some dark space into free space (budgeting treats dark space
* as not available);
* o commit may free some LEB, i.e., turn freeable LEBs into free LEBs.
*
* So this function tries to do the above. Returns %-EAGAIN if some free space
* was presumably made and the caller has to re-try budgeting the operation.
* Returns %-ENOSPC if it couldn't do more free space, and other negative error
* codes on failures.
*/
static int make_free_space(struct ubifs_info *c)
{
int err, retries = 0;
long long liab1, liab2;
do {
liab1 = get_liability(c);
/*
* We probably have some dirty pages or inodes (liability), try
* to write them back.
*/
dbg_budg("liability %lld, run write-back", liab1);
shrink_liability(c, NR_TO_WRITE);
liab2 = get_liability(c);
if (liab2 < liab1)
return -EAGAIN;
dbg_budg("new liability %lld (not shrunk)", liab2);
/* Liability did not shrink again, try GC */
dbg_budg("Run GC");
err = run_gc(c);
if (!err)
return -EAGAIN;
if (err != -EAGAIN && err != -ENOSPC)
/* Some real error happened */
return err;
dbg_budg("Run commit (retries %d)", retries);
err = ubifs_run_commit(c);
if (err)
return err;
} while (retries++ < MAX_MKSPC_RETRIES);
return -ENOSPC;
}
/**
* ubifs_calc_min_idx_lebs - calculate amount of LEBs for the index.
* @c: UBIFS file-system description object
*
* This function calculates and returns the number of LEBs which should be kept
* for index usage.
*/
int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
{
int idx_lebs;
long long idx_size;
idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx;
/* And make sure we have thrice the index size of space reserved */
idx_size += idx_size << 1;
/*
* We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
* pair, nor similarly the two variables for the new index size, so we
* have to do this costly 64-bit division on fast-path.
*/
idx_lebs = div_u64(idx_size + c->idx_leb_size - 1, c->idx_leb_size);
/*
* The index head is not available for the in-the-gaps method, so add an
* extra LEB to compensate.
*/
idx_lebs += 1;
if (idx_lebs < MIN_INDEX_LEBS)
idx_lebs = MIN_INDEX_LEBS;
return idx_lebs;
}
/**
* ubifs_calc_available - calculate available FS space.
* @c: UBIFS file-system description object
* @min_idx_lebs: minimum number of LEBs reserved for the index
*
* This function calculates and returns amount of FS space available for use.
*/
long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
{
int subtract_lebs;
long long available;
available = c->main_bytes - c->lst.total_used;
/*
* Now 'available' contains theoretically available flash space
* assuming there is no index, so we have to subtract the space which
* is reserved for the index.
*/
subtract_lebs = min_idx_lebs;
/* Take into account that GC reserves one LEB for its own needs */
subtract_lebs += 1;
/*
* The GC journal head LEB is not really accessible. And since
* different write types go to different heads, we may count only on
* one head's space.
*/
subtract_lebs += c->jhead_cnt - 1;
/* We also reserve one LEB for deletions, which bypass budgeting */
subtract_lebs += 1;
available -= (long long)subtract_lebs * c->leb_size;
/* Subtract the dead space which is not available for use */
available -= c->lst.total_dead;
/*
* Subtract dark space, which might or might not be usable - it depends
* on the data which we have on the media and which will be written. If
* this is a lot of uncompressed or not-compressible data, the dark
* space cannot be used.
*/
available -= c->lst.total_dark;
/*
* However, there is more dark space. The index may be bigger than
* @min_idx_lebs. Those extra LEBs are assumed to be available, but
* their dark space is not included in total_dark, so it is subtracted
* here.
*/
if (c->lst.idx_lebs > min_idx_lebs) {
subtract_lebs = c->lst.idx_lebs - min_idx_lebs;
available -= subtract_lebs * c->dark_wm;
}
/* The calculations are rough and may end up with a negative number */
return available > 0 ? available : 0;
}
/**
* can_use_rp - check whether the user is allowed to use reserved pool.
* @c: UBIFS file-system description object
*
* UBIFS has so-called "reserved pool" which is flash space reserved
* for the superuser and for uses whose UID/GID is recorded in UBIFS superblock.
* This function checks whether current user is allowed to use reserved pool.
* Returns %1 current user is allowed to use reserved pool and %0 otherwise.
*/
static int can_use_rp(struct ubifs_info *c)
{
if (uid_eq(current_fsuid(), c->rp_uid) || capable(CAP_SYS_RESOURCE) ||
(!gid_eq(c->rp_gid, GLOBAL_ROOT_GID) && in_group_p(c->rp_gid)))
return 1;
return 0;
}
/**
* do_budget_space - reserve flash space for index and data growth.
* @c: UBIFS file-system description object
*
* This function makes sure UBIFS has enough free LEBs for index growth and
* data.
*
* When budgeting index space, UBIFS reserves thrice as many LEBs as the index
* would take if it was consolidated and written to the flash. This guarantees
* that the "in-the-gaps" commit method always succeeds and UBIFS will always
* be able to commit dirty index. So this function basically adds amount of
* budgeted index space to the size of the current index, multiplies this by 3,
* and makes sure this does not exceed the amount of free LEBs.
*
* Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables:
* o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
* be large, because UBIFS does not do any index consolidation as long as
* there is free space. IOW, the index may take a lot of LEBs, but the LEBs
* will contain a lot of dirt.
* o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW,
* the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs.
*
* This function returns zero in case of success, and %-ENOSPC in case of
* failure.
*/
static int do_budget_space(struct ubifs_info *c)
{
long long outstanding, available;
int lebs, rsvd_idx_lebs, min_idx_lebs;
/* First budget index space */
min_idx_lebs = ubifs_calc_min_idx_lebs(c);
/* Now 'min_idx_lebs' contains number of LEBs to reserve */
if (min_idx_lebs > c->lst.idx_lebs)
rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
else
rsvd_idx_lebs = 0;
/*
* The number of LEBs that are available to be used by the index is:
*
* @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt -
* @c->lst.taken_empty_lebs
*
* @c->lst.empty_lebs are available because they are empty.
* @c->freeable_cnt are available because they contain only free and
* dirty space, @c->idx_gc_cnt are available because they are index
* LEBs that have been garbage collected and are awaiting the commit
* before they can be used. And the in-the-gaps method will grab these
* if it needs them. @c->lst.taken_empty_lebs are empty LEBs that have
* already been allocated for some purpose.
*
* Note, @c->idx_gc_cnt is included to both @c->lst.empty_lebs (because
* these LEBs are empty) and to @c->lst.taken_empty_lebs (because they
* are taken until after the commit).
*
* Note, @c->lst.taken_empty_lebs may temporarily be higher by one
* because of the way we serialize LEB allocations and budgeting. See a
* comment in 'ubifs_find_free_space()'.
*/
lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
c->lst.taken_empty_lebs;
if (unlikely(rsvd_idx_lebs > lebs)) {
dbg_budg("out of indexing space: min_idx_lebs %d (old %d), rsvd_idx_lebs %d",
min_idx_lebs, c->bi.min_idx_lebs, rsvd_idx_lebs);
return -ENOSPC;
}
available = ubifs_calc_available(c, min_idx_lebs);
outstanding = c->bi.data_growth + c->bi.dd_growth;
if (unlikely(available < outstanding)) {
dbg_budg("out of data space: available %lld, outstanding %lld",
available, outstanding);
return -ENOSPC;
}
if (available - outstanding <= c->rp_size && !can_use_rp(c))
return -ENOSPC;
c->bi.min_idx_lebs = min_idx_lebs;
return 0;
}
/**
* calc_idx_growth - calculate approximate index growth from budgeting request.
* @c: UBIFS file-system description object
* @req: budgeting request
*
* For now we assume each new node adds one znode. But this is rather poor
* approximation, though.
*/
static int calc_idx_growth(const struct ubifs_info *c,
const struct ubifs_budget_req *req)
{
int znodes;
znodes = req->new_ino + (req->new_page << UBIFS_BLOCKS_PER_PAGE_SHIFT) +
req->new_dent;
return znodes * c->max_idx_node_sz;
}
/**
* calc_data_growth - calculate approximate amount of new data from budgeting
* request.
* @c: UBIFS file-system description object
* @req: budgeting request
*/
static int calc_data_growth(const struct ubifs_info *c,
const struct ubifs_budget_req *req)
{
int data_growth;
data_growth = req->new_ino ? c->bi.inode_budget : 0;
if (req->new_page)
data_growth += c->bi.page_budget;
if (req->new_dent)
data_growth += c->bi.dent_budget;
data_growth += req->new_ino_d;
return data_growth;
}
/**
* calc_dd_growth - calculate approximate amount of data which makes other data
* dirty from budgeting request.
* @c: UBIFS file-system description object
* @req: budgeting request
*/
static int calc_dd_growth(const struct ubifs_info *c,
const struct ubifs_budget_req *req)
{
int dd_growth;
dd_growth = req->dirtied_page ? c->bi.page_budget : 0;
if (req->dirtied_ino)
dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1);
if (req->mod_dent)
dd_growth += c->bi.dent_budget;
dd_growth += req->dirtied_ino_d;
return dd_growth;
}
/**
* ubifs_budget_space - ensure there is enough space to complete an operation.
* @c: UBIFS file-system description object
* @req: budget request
*
* This function allocates budget for an operation. It uses pessimistic
* approximation of how much flash space the operation needs. The goal of this
* function is to make sure UBIFS always has flash space to flush all dirty
* pages, dirty inodes, and dirty znodes (liability). This function may force
* commit, garbage-collection or write-back. Returns zero in case of success,
* %-ENOSPC if there is no free space and other negative error codes in case of
* failures.
*/
int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
{
int err, idx_growth, data_growth, dd_growth, retried = 0;
ubifs_assert(req->new_page <= 1);
ubifs_assert(req->dirtied_page <= 1);
ubifs_assert(req->new_dent <= 1);
ubifs_assert(req->mod_dent <= 1);
ubifs_assert(req->new_ino <= 1);
ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA);
ubifs_assert(req->dirtied_ino <= 4);
ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
ubifs_assert(!(req->new_ino_d & 7));
ubifs_assert(!(req->dirtied_ino_d & 7));
data_growth = calc_data_growth(c, req);
dd_growth = calc_dd_growth(c, req);
if (!data_growth && !dd_growth)
return 0;
idx_growth = calc_idx_growth(c, req);
again:
spin_lock(&c->space_lock);
ubifs_assert(c->bi.idx_growth >= 0);
ubifs_assert(c->bi.data_growth >= 0);
ubifs_assert(c->bi.dd_growth >= 0);
if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) {
dbg_budg("no space");
spin_unlock(&c->space_lock);
return -ENOSPC;
}
c->bi.idx_growth += idx_growth;
c->bi.data_growth += data_growth;
c->bi.dd_growth += dd_growth;
err = do_budget_space(c);
if (likely(!err)) {
req->idx_growth = idx_growth;
req->data_growth = data_growth;
req->dd_growth = dd_growth;
spin_unlock(&c->space_lock);
return 0;
}
/* Restore the old values */
c->bi.idx_growth -= idx_growth;
c->bi.data_growth -= data_growth;
c->bi.dd_growth -= dd_growth;
spin_unlock(&c->space_lock);
if (req->fast) {
dbg_budg("no space for fast budgeting");
return err;
}
err = make_free_space(c);
cond_resched();
if (err == -EAGAIN) {
dbg_budg("try again");
goto again;
} else if (err == -ENOSPC) {
if (!retried) {
retried = 1;
dbg_budg("-ENOSPC, but anyway try once again");
goto again;
}
dbg_budg("FS is full, -ENOSPC");
c->bi.nospace = 1;
if (can_use_rp(c) || c->rp_size == 0)
c->bi.nospace_rp = 1;
smp_wmb();
} else
ubifs_err("cannot budget space, error %d", err);
return err;
}
/**
* ubifs_release_budget - release budgeted free space.
* @c: UBIFS file-system description object
* @req: budget request
*
* This function releases the space budgeted by 'ubifs_budget_space()'. Note,
* since the index changes (which were budgeted for in @req->idx_growth) will
* only be written to the media on commit, this function moves the index budget
* from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed
* by the commit operation.
*/
void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
{
ubifs_assert(req->new_page <= 1);
ubifs_assert(req->dirtied_page <= 1);
ubifs_assert(req->new_dent <= 1);
ubifs_assert(req->mod_dent <= 1);
ubifs_assert(req->new_ino <= 1);
ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA);
ubifs_assert(req->dirtied_ino <= 4);
ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
ubifs_assert(!(req->new_ino_d & 7));
ubifs_assert(!(req->dirtied_ino_d & 7));
if (!req->recalculate) {
ubifs_assert(req->idx_growth >= 0);
ubifs_assert(req->data_growth >= 0);
ubifs_assert(req->dd_growth >= 0);
}
if (req->recalculate) {
req->data_growth = calc_data_growth(c, req);
req->dd_growth = calc_dd_growth(c, req);
req->idx_growth = calc_idx_growth(c, req);
}
if (!req->data_growth && !req->dd_growth)
return;
c->bi.nospace = c->bi.nospace_rp = 0;
smp_wmb();
spin_lock(&c->space_lock);
c->bi.idx_growth -= req->idx_growth;
c->bi.uncommitted_idx += req->idx_growth;
c->bi.data_growth -= req->data_growth;
c->bi.dd_growth -= req->dd_growth;
c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
ubifs_assert(c->bi.idx_growth >= 0);
ubifs_assert(c->bi.data_growth >= 0);
ubifs_assert(c->bi.dd_growth >= 0);
ubifs_assert(c->bi.min_idx_lebs < c->main_lebs);
ubifs_assert(!(c->bi.idx_growth & 7));
ubifs_assert(!(c->bi.data_growth & 7));
ubifs_assert(!(c->bi.dd_growth & 7));
spin_unlock(&c->space_lock);
}
/**
* ubifs_convert_page_budget - convert budget of a new page.
* @c: UBIFS file-system description object
*
* This function converts budget which was allocated for a new page of data to
* the budget of changing an existing page of data. The latter is smaller than
* the former, so this function only does simple re-calculation and does not
* involve any write-back.
*/
void ubifs_convert_page_budget(struct ubifs_info *c)
{
spin_lock(&c->space_lock);
/* Release the index growth reservation */
c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
/* Release the data growth reservation */
c->bi.data_growth -= c->bi.page_budget;
/* Increase the dirty data growth reservation instead */
c->bi.dd_growth += c->bi.page_budget;
/* And re-calculate the indexing space reservation */
c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
spin_unlock(&c->space_lock);
}
/**
* ubifs_release_dirty_inode_budget - release dirty inode budget.
* @c: UBIFS file-system description object
* @ui: UBIFS inode to release the budget for
*
* This function releases budget corresponding to a dirty inode. It is usually
* called when after the inode has been written to the media and marked as
* clean. It also causes the "no space" flags to be cleared.
*/
void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
struct ubifs_inode *ui)
{
struct ubifs_budget_req req;
memset(&req, 0, sizeof(struct ubifs_budget_req));
/* The "no space" flags will be cleared because dd_growth is > 0 */
req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8);
ubifs_release_budget(c, &req);
}
/**
* ubifs_reported_space - calculate reported free space.
* @c: the UBIFS file-system description object
* @free: amount of free space
*
* This function calculates amount of free space which will be reported to
* user-space. User-space application tend to expect that if the file-system
* (e.g., via the 'statfs()' call) reports that it has N bytes available, they
* are able to write a file of size N. UBIFS attaches node headers to each data
* node and it has to write indexing nodes as well. This introduces additional
* overhead, and UBIFS has to report slightly less free space to meet the above
* expectations.
*
* This function assumes free space is made up of uncompressed data nodes and
* full index nodes (one per data node, tripled because we always allow enough
* space to write the index thrice).
*
* Note, the calculation is pessimistic, which means that most of the time
* UBIFS reports less space than it actually has.
*/
long long ubifs_reported_space(const struct ubifs_info *c, long long free)
{
int divisor, factor, f;
/*
* Reported space size is @free * X, where X is UBIFS block size
* divided by UBIFS block size + all overhead one data block
* introduces. The overhead is the node header + indexing overhead.
*
* Indexing overhead calculations are based on the following formula:
* I = N/(f - 1) + 1, where I - number of indexing nodes, N - number
* of data nodes, f - fanout. Because effective UBIFS fanout is twice
* as less than maximum fanout, we assume that each data node
* introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes.
* Note, the multiplier 3 is because UBIFS reserves thrice as more space
* for the index.
*/
f = c->fanout > 3 ? c->fanout >> 1 : 2;
factor = UBIFS_BLOCK_SIZE;
divisor = UBIFS_MAX_DATA_NODE_SZ;
divisor += (c->max_idx_node_sz * 3) / (f - 1);
free *= factor;
return div_u64(free, divisor);
}
/**
* ubifs_get_free_space_nolock - return amount of free space.
* @c: UBIFS file-system description object
*
* This function calculates amount of free space to report to user-space.
*
* Because UBIFS may introduce substantial overhead (the index, node headers,
* alignment, wastage at the end of LEBs, etc), it cannot report real amount of
* free flash space it has (well, because not all dirty space is reclaimable,
* UBIFS does not actually know the real amount). If UBIFS did so, it would
* bread user expectations about what free space is. Users seem to accustomed
* to assume that if the file-system reports N bytes of free space, they would
* be able to fit a file of N bytes to the FS. This almost works for
* traditional file-systems, because they have way less overhead than UBIFS.
* So, to keep users happy, UBIFS tries to take the overhead into account.
*/
long long ubifs_get_free_space_nolock(struct ubifs_info *c)
{
int rsvd_idx_lebs, lebs;
long long available, outstanding, free;
ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
outstanding = c->bi.data_growth + c->bi.dd_growth;
available = ubifs_calc_available(c, c->bi.min_idx_lebs);
/*
* When reporting free space to user-space, UBIFS guarantees that it is
* possible to write a file of free space size. This means that for
* empty LEBs we may use more precise calculations than
* 'ubifs_calc_available()' is using. Namely, we know that in empty
* LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm.
* Thus, amend the available space.
*
* Note, the calculations below are similar to what we have in
* 'do_budget_space()', so refer there for comments.
*/
if (c->bi.min_idx_lebs > c->lst.idx_lebs)
rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
else
rsvd_idx_lebs = 0;
lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
c->lst.taken_empty_lebs;
lebs -= rsvd_idx_lebs;
available += lebs * (c->dark_wm - c->leb_overhead);
if (available > outstanding)
free = ubifs_reported_space(c, available - outstanding);
else
free = 0;
return free;
}
/**
* ubifs_get_free_space - return amount of free space.
* @c: UBIFS file-system description object
*
* This function calculates and returns amount of free space to report to
* user-space.
*/
long long ubifs_get_free_space(struct ubifs_info *c)
{
long long free;
spin_lock(&c->space_lock);
free = ubifs_get_free_space_nolock(c);
spin_unlock(&c->space_lock);
return free;
}

734
fs/ubifs/commit.c Normal file
View file

@ -0,0 +1,734 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Adrian Hunter
* Artem Bityutskiy (Битюцкий Артём)
*/
/*
* This file implements functions that manage the running of the commit process.
* Each affected module has its own functions to accomplish their part in the
* commit and those functions are called here.
*
* The commit is the process whereby all updates to the index and LEB properties
* are written out together and the journal becomes empty. This keeps the
* file system consistent - at all times the state can be recreated by reading
* the index and LEB properties and then replaying the journal.
*
* The commit is split into two parts named "commit start" and "commit end".
* During commit start, the commit process has exclusive access to the journal
* by holding the commit semaphore down for writing. As few I/O operations as
* possible are performed during commit start, instead the nodes that are to be
* written are merely identified. During commit end, the commit semaphore is no
* longer held and the journal is again in operation, allowing users to continue
* to use the file system while the bulk of the commit I/O is performed. The
* purpose of this two-step approach is to prevent the commit from causing any
* latency blips. Note that in any case, the commit does not prevent lookups
* (as permitted by the TNC mutex), or access to VFS data structures e.g. page
* cache.
*/
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/slab.h>
#include "ubifs.h"
/*
* nothing_to_commit - check if there is nothing to commit.
* @c: UBIFS file-system description object
*
* This is a helper function which checks if there is anything to commit. It is
* used as an optimization to avoid starting the commit if it is not really
* necessary. Indeed, the commit operation always assumes flash I/O (e.g.,
* writing the commit start node to the log), and it is better to avoid doing
* this unnecessarily. E.g., 'ubifs_sync_fs()' runs the commit, but if there is
* nothing to commit, it is more optimal to avoid any flash I/O.
*
* This function has to be called with @c->commit_sem locked for writing -
* this function does not take LPT/TNC locks because the @c->commit_sem
* guarantees that we have exclusive access to the TNC and LPT data structures.
*
* This function returns %1 if there is nothing to commit and %0 otherwise.
*/
static int nothing_to_commit(struct ubifs_info *c)
{
/*
* During mounting or remounting from R/O mode to R/W mode we may
* commit for various recovery-related reasons.
*/
if (c->mounting || c->remounting_rw)
return 0;
/*
* If the root TNC node is dirty, we definitely have something to
* commit.
*/
if (c->zroot.znode && ubifs_zn_dirty(c->zroot.znode))
return 0;
/*
* Even though the TNC is clean, the LPT tree may have dirty nodes. For
* example, this may happen if the budgeting subsystem invoked GC to
* make some free space, and the GC found an LEB with only dirty and
* free space. In this case GC would just change the lprops of this
* LEB (by turning all space into free space) and unmap it.
*/
if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags))
return 0;
ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0);
ubifs_assert(c->dirty_pn_cnt == 0);
ubifs_assert(c->dirty_nn_cnt == 0);
return 1;
}
/**
* do_commit - commit the journal.
* @c: UBIFS file-system description object
*
* This function implements UBIFS commit. It has to be called with commit lock
* locked. Returns zero in case of success and a negative error code in case of
* failure.
*/
static int do_commit(struct ubifs_info *c)
{
int err, new_ltail_lnum, old_ltail_lnum, i;
struct ubifs_zbranch zroot;
struct ubifs_lp_stats lst;
dbg_cmt("start");
ubifs_assert(!c->ro_media && !c->ro_mount);
if (c->ro_error) {
err = -EROFS;
goto out_up;
}
if (nothing_to_commit(c)) {
up_write(&c->commit_sem);
err = 0;
goto out_cancel;
}
/* Sync all write buffers (necessary for recovery) */
for (i = 0; i < c->jhead_cnt; i++) {
err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
if (err)
goto out_up;
}
c->cmt_no += 1;
err = ubifs_gc_start_commit(c);
if (err)
goto out_up;
err = dbg_check_lprops(c);
if (err)
goto out_up;
err = ubifs_log_start_commit(c, &new_ltail_lnum);
if (err)
goto out_up;
err = ubifs_tnc_start_commit(c, &zroot);
if (err)
goto out_up;
err = ubifs_lpt_start_commit(c);
if (err)
goto out_up;
err = ubifs_orphan_start_commit(c);
if (err)
goto out_up;
ubifs_get_lp_stats(c, &lst);
up_write(&c->commit_sem);
err = ubifs_tnc_end_commit(c);
if (err)
goto out;
err = ubifs_lpt_end_commit(c);
if (err)
goto out;
err = ubifs_orphan_end_commit(c);
if (err)
goto out;
err = dbg_check_old_index(c, &zroot);
if (err)
goto out;
c->mst_node->cmt_no = cpu_to_le64(c->cmt_no);
c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum);
c->mst_node->root_lnum = cpu_to_le32(zroot.lnum);
c->mst_node->root_offs = cpu_to_le32(zroot.offs);
c->mst_node->root_len = cpu_to_le32(zroot.len);
c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum);
c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs);
c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz);
c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum);
c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs);
c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum);
c->mst_node->nhead_offs = cpu_to_le32(c->nhead_offs);
c->mst_node->ltab_lnum = cpu_to_le32(c->ltab_lnum);
c->mst_node->ltab_offs = cpu_to_le32(c->ltab_offs);
c->mst_node->lsave_lnum = cpu_to_le32(c->lsave_lnum);
c->mst_node->lsave_offs = cpu_to_le32(c->lsave_offs);
c->mst_node->lscan_lnum = cpu_to_le32(c->lscan_lnum);
c->mst_node->empty_lebs = cpu_to_le32(lst.empty_lebs);
c->mst_node->idx_lebs = cpu_to_le32(lst.idx_lebs);
c->mst_node->total_free = cpu_to_le64(lst.total_free);
c->mst_node->total_dirty = cpu_to_le64(lst.total_dirty);
c->mst_node->total_used = cpu_to_le64(lst.total_used);
c->mst_node->total_dead = cpu_to_le64(lst.total_dead);
c->mst_node->total_dark = cpu_to_le64(lst.total_dark);
if (c->no_orphs)
c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
else
c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS);
old_ltail_lnum = c->ltail_lnum;
err = ubifs_log_end_commit(c, new_ltail_lnum);
if (err)
goto out;
err = ubifs_log_post_commit(c, old_ltail_lnum);
if (err)
goto out;
err = ubifs_gc_end_commit(c);
if (err)
goto out;
err = ubifs_lpt_post_commit(c);
if (err)
goto out;
out_cancel:
spin_lock(&c->cs_lock);
c->cmt_state = COMMIT_RESTING;
wake_up(&c->cmt_wq);
dbg_cmt("commit end");
spin_unlock(&c->cs_lock);
return 0;
out_up:
up_write(&c->commit_sem);
out:
ubifs_err("commit failed, error %d", err);
spin_lock(&c->cs_lock);
c->cmt_state = COMMIT_BROKEN;
wake_up(&c->cmt_wq);
spin_unlock(&c->cs_lock);
ubifs_ro_mode(c, err);
return err;
}
/**
* run_bg_commit - run background commit if it is needed.
* @c: UBIFS file-system description object
*
* This function runs background commit if it is needed. Returns zero in case
* of success and a negative error code in case of failure.
*/
static int run_bg_commit(struct ubifs_info *c)
{
spin_lock(&c->cs_lock);
/*
* Run background commit only if background commit was requested or if
* commit is required.
*/
if (c->cmt_state != COMMIT_BACKGROUND &&
c->cmt_state != COMMIT_REQUIRED)
goto out;
spin_unlock(&c->cs_lock);
down_write(&c->commit_sem);
spin_lock(&c->cs_lock);
if (c->cmt_state == COMMIT_REQUIRED)
c->cmt_state = COMMIT_RUNNING_REQUIRED;
else if (c->cmt_state == COMMIT_BACKGROUND)
c->cmt_state = COMMIT_RUNNING_BACKGROUND;
else
goto out_cmt_unlock;
spin_unlock(&c->cs_lock);
return do_commit(c);
out_cmt_unlock:
up_write(&c->commit_sem);
out:
spin_unlock(&c->cs_lock);
return 0;
}
/**
* ubifs_bg_thread - UBIFS background thread function.
* @info: points to the file-system description object
*
* This function implements various file-system background activities:
* o when a write-buffer timer expires it synchronizes the appropriate
* write-buffer;
* o when the journal is about to be full, it starts in-advance commit.
*
* Note, other stuff like background garbage collection may be added here in
* future.
*/
int ubifs_bg_thread(void *info)
{
int err;
struct ubifs_info *c = info;
ubifs_msg("background thread \"%s\" started, PID %d",
c->bgt_name, current->pid);
set_freezable();
while (1) {
if (kthread_should_stop())
break;
if (try_to_freeze())
continue;
set_current_state(TASK_INTERRUPTIBLE);
/* Check if there is something to do */
if (!c->need_bgt) {
/*
* Nothing prevents us from going sleep now and
* be never woken up and block the task which
* could wait in 'kthread_stop()' forever.
*/
if (kthread_should_stop())
break;
schedule();
continue;
} else
__set_current_state(TASK_RUNNING);
c->need_bgt = 0;
err = ubifs_bg_wbufs_sync(c);
if (err)
ubifs_ro_mode(c, err);
run_bg_commit(c);
cond_resched();
}
ubifs_msg("background thread \"%s\" stops", c->bgt_name);
return 0;
}
/**
* ubifs_commit_required - set commit state to "required".
* @c: UBIFS file-system description object
*
* This function is called if a commit is required but cannot be done from the
* calling function, so it is just flagged instead.
*/
void ubifs_commit_required(struct ubifs_info *c)
{
spin_lock(&c->cs_lock);
switch (c->cmt_state) {
case COMMIT_RESTING:
case COMMIT_BACKGROUND:
dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
dbg_cstate(COMMIT_REQUIRED));
c->cmt_state = COMMIT_REQUIRED;
break;
case COMMIT_RUNNING_BACKGROUND:
dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
dbg_cstate(COMMIT_RUNNING_REQUIRED));
c->cmt_state = COMMIT_RUNNING_REQUIRED;
break;
case COMMIT_REQUIRED:
case COMMIT_RUNNING_REQUIRED:
case COMMIT_BROKEN:
break;
}
spin_unlock(&c->cs_lock);
}
/**
* ubifs_request_bg_commit - notify the background thread to do a commit.
* @c: UBIFS file-system description object
*
* This function is called if the journal is full enough to make a commit
* worthwhile, so background thread is kicked to start it.
*/
void ubifs_request_bg_commit(struct ubifs_info *c)
{
spin_lock(&c->cs_lock);
if (c->cmt_state == COMMIT_RESTING) {
dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
dbg_cstate(COMMIT_BACKGROUND));
c->cmt_state = COMMIT_BACKGROUND;
spin_unlock(&c->cs_lock);
ubifs_wake_up_bgt(c);
} else
spin_unlock(&c->cs_lock);
}
/**
* wait_for_commit - wait for commit.
* @c: UBIFS file-system description object
*
* This function sleeps until the commit operation is no longer running.
*/
static int wait_for_commit(struct ubifs_info *c)
{
dbg_cmt("pid %d goes sleep", current->pid);
/*
* The following sleeps if the condition is false, and will be woken
* when the commit ends. It is possible, although very unlikely, that we
* will wake up and see the subsequent commit running, rather than the
* one we were waiting for, and go back to sleep. However, we will be
* woken again, so there is no danger of sleeping forever.
*/
wait_event(c->cmt_wq, c->cmt_state != COMMIT_RUNNING_BACKGROUND &&
c->cmt_state != COMMIT_RUNNING_REQUIRED);
dbg_cmt("commit finished, pid %d woke up", current->pid);
return 0;
}
/**
* ubifs_run_commit - run or wait for commit.
* @c: UBIFS file-system description object
*
* This function runs commit and returns zero in case of success and a negative
* error code in case of failure.
*/
int ubifs_run_commit(struct ubifs_info *c)
{
int err = 0;
spin_lock(&c->cs_lock);
if (c->cmt_state == COMMIT_BROKEN) {
err = -EROFS;
goto out;
}
if (c->cmt_state == COMMIT_RUNNING_BACKGROUND)
/*
* We set the commit state to 'running required' to indicate
* that we want it to complete as quickly as possible.
*/
c->cmt_state = COMMIT_RUNNING_REQUIRED;
if (c->cmt_state == COMMIT_RUNNING_REQUIRED) {
spin_unlock(&c->cs_lock);
return wait_for_commit(c);
}
spin_unlock(&c->cs_lock);
/* Ok, the commit is indeed needed */
down_write(&c->commit_sem);
spin_lock(&c->cs_lock);
/*
* Since we unlocked 'c->cs_lock', the state may have changed, so
* re-check it.
*/
if (c->cmt_state == COMMIT_BROKEN) {
err = -EROFS;
goto out_cmt_unlock;
}
if (c->cmt_state == COMMIT_RUNNING_BACKGROUND)
c->cmt_state = COMMIT_RUNNING_REQUIRED;
if (c->cmt_state == COMMIT_RUNNING_REQUIRED) {
up_write(&c->commit_sem);
spin_unlock(&c->cs_lock);
return wait_for_commit(c);
}
c->cmt_state = COMMIT_RUNNING_REQUIRED;
spin_unlock(&c->cs_lock);
err = do_commit(c);
return err;
out_cmt_unlock:
up_write(&c->commit_sem);
out:
spin_unlock(&c->cs_lock);
return err;
}
/**
* ubifs_gc_should_commit - determine if it is time for GC to run commit.
* @c: UBIFS file-system description object
*
* This function is called by garbage collection to determine if commit should
* be run. If commit state is @COMMIT_BACKGROUND, which means that the journal
* is full enough to start commit, this function returns true. It is not
* absolutely necessary to commit yet, but it feels like this should be better
* then to keep doing GC. This function returns %1 if GC has to initiate commit
* and %0 if not.
*/
int ubifs_gc_should_commit(struct ubifs_info *c)
{
int ret = 0;
spin_lock(&c->cs_lock);
if (c->cmt_state == COMMIT_BACKGROUND) {
dbg_cmt("commit required now");
c->cmt_state = COMMIT_REQUIRED;
} else
dbg_cmt("commit not requested");
if (c->cmt_state == COMMIT_REQUIRED)
ret = 1;
spin_unlock(&c->cs_lock);
return ret;
}
/*
* Everything below is related to debugging.
*/
/**
* struct idx_node - hold index nodes during index tree traversal.
* @list: list
* @iip: index in parent (slot number of this indexing node in the parent
* indexing node)
* @upper_key: all keys in this indexing node have to be less or equivalent to
* this key
* @idx: index node (8-byte aligned because all node structures must be 8-byte
* aligned)
*/
struct idx_node {
struct list_head list;
int iip;
union ubifs_key upper_key;
struct ubifs_idx_node idx __aligned(8);
};
/**
* dbg_old_index_check_init - get information for the next old index check.
* @c: UBIFS file-system description object
* @zroot: root of the index
*
* This function records information about the index that will be needed for the
* next old index check i.e. 'dbg_check_old_index()'.
*
* This function returns %0 on success and a negative error code on failure.
*/
int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot)
{
struct ubifs_idx_node *idx;
int lnum, offs, len, err = 0;
struct ubifs_debug_info *d = c->dbg;
d->old_zroot = *zroot;
lnum = d->old_zroot.lnum;
offs = d->old_zroot.offs;
len = d->old_zroot.len;
idx = kmalloc(c->max_idx_node_sz, GFP_NOFS);
if (!idx)
return -ENOMEM;
err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs);
if (err)
goto out;
d->old_zroot_level = le16_to_cpu(idx->level);
d->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum);
out:
kfree(idx);
return err;
}
/**
* dbg_check_old_index - check the old copy of the index.
* @c: UBIFS file-system description object
* @zroot: root of the new index
*
* In order to be able to recover from an unclean unmount, a complete copy of
* the index must exist on flash. This is the "old" index. The commit process
* must write the "new" index to flash without overwriting or destroying any
* part of the old index. This function is run at commit end in order to check
* that the old index does indeed exist completely intact.
*
* This function returns %0 on success and a negative error code on failure.
*/
int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
{
int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt;
int first = 1, iip;
struct ubifs_debug_info *d = c->dbg;
union ubifs_key uninitialized_var(lower_key), upper_key, l_key, u_key;
unsigned long long uninitialized_var(last_sqnum);
struct ubifs_idx_node *idx;
struct list_head list;
struct idx_node *i;
size_t sz;
if (!dbg_is_chk_index(c))
return 0;
INIT_LIST_HEAD(&list);
sz = sizeof(struct idx_node) + ubifs_idx_node_sz(c, c->fanout) -
UBIFS_IDX_NODE_SZ;
/* Start at the old zroot */
lnum = d->old_zroot.lnum;
offs = d->old_zroot.offs;
len = d->old_zroot.len;
iip = 0;
/*
* Traverse the index tree preorder depth-first i.e. do a node and then
* its subtrees from left to right.
*/
while (1) {
struct ubifs_branch *br;
/* Get the next index node */
i = kmalloc(sz, GFP_NOFS);
if (!i) {
err = -ENOMEM;
goto out_free;
}
i->iip = iip;
/* Keep the index nodes on our path in a linked list */
list_add_tail(&i->list, &list);
/* Read the index node */
idx = &i->idx;
err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs);
if (err)
goto out_free;
/* Validate index node */
child_cnt = le16_to_cpu(idx->child_cnt);
if (child_cnt < 1 || child_cnt > c->fanout) {
err = 1;
goto out_dump;
}
if (first) {
first = 0;
/* Check root level and sqnum */
if (le16_to_cpu(idx->level) != d->old_zroot_level) {
err = 2;
goto out_dump;
}
if (le64_to_cpu(idx->ch.sqnum) != d->old_zroot_sqnum) {
err = 3;
goto out_dump;
}
/* Set last values as though root had a parent */
last_level = le16_to_cpu(idx->level) + 1;
last_sqnum = le64_to_cpu(idx->ch.sqnum) + 1;
key_read(c, ubifs_idx_key(c, idx), &lower_key);
highest_ino_key(c, &upper_key, INUM_WATERMARK);
}
key_copy(c, &upper_key, &i->upper_key);
if (le16_to_cpu(idx->level) != last_level - 1) {
err = 3;
goto out_dump;
}
/*
* The index is always written bottom up hence a child's sqnum
* is always less than the parents.
*/
if (le64_to_cpu(idx->ch.sqnum) >= last_sqnum) {
err = 4;
goto out_dump;
}
/* Check key range */
key_read(c, ubifs_idx_key(c, idx), &l_key);
br = ubifs_idx_branch(c, idx, child_cnt - 1);
key_read(c, &br->key, &u_key);
if (keys_cmp(c, &lower_key, &l_key) > 0) {
err = 5;
goto out_dump;
}
if (keys_cmp(c, &upper_key, &u_key) < 0) {
err = 6;
goto out_dump;
}
if (keys_cmp(c, &upper_key, &u_key) == 0)
if (!is_hash_key(c, &u_key)) {
err = 7;
goto out_dump;
}
/* Go to next index node */
if (le16_to_cpu(idx->level) == 0) {
/* At the bottom, so go up until can go right */
while (1) {
/* Drop the bottom of the list */
list_del(&i->list);
kfree(i);
/* No more list means we are done */
if (list_empty(&list))
goto out;
/* Look at the new bottom */
i = list_entry(list.prev, struct idx_node,
list);
idx = &i->idx;
/* Can we go right */
if (iip + 1 < le16_to_cpu(idx->child_cnt)) {
iip = iip + 1;
break;
} else
/* Nope, so go up again */
iip = i->iip;
}
} else
/* Go down left */
iip = 0;
/*
* We have the parent in 'idx' and now we set up for reading the
* child pointed to by slot 'iip'.
*/
last_level = le16_to_cpu(idx->level);
last_sqnum = le64_to_cpu(idx->ch.sqnum);
br = ubifs_idx_branch(c, idx, iip);
lnum = le32_to_cpu(br->lnum);
offs = le32_to_cpu(br->offs);
len = le32_to_cpu(br->len);
key_read(c, &br->key, &lower_key);
if (iip + 1 < le16_to_cpu(idx->child_cnt)) {
br = ubifs_idx_branch(c, idx, iip + 1);
key_read(c, &br->key, &upper_key);
} else
key_copy(c, &i->upper_key, &upper_key);
}
out:
err = dbg_old_index_check_init(c, zroot);
if (err)
goto out_free;
return 0;
out_dump:
ubifs_err("dumping index node (iip=%d)", i->iip);
ubifs_dump_node(c, idx);
list_del(&i->list);
kfree(i);
if (!list_empty(&list)) {
i = list_entry(list.prev, struct idx_node, list);
ubifs_err("dumping parent index node");
ubifs_dump_node(c, &i->idx);
}
out_free:
while (!list_empty(&list)) {
i = list_entry(list.next, struct idx_node, list);
list_del(&i->list);
kfree(i);
}
ubifs_err("failed, error %d", err);
if (err > 0)
err = -EINVAL;
return err;
}

250
fs/ubifs/compress.c Normal file
View file

@ -0,0 +1,250 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
* Copyright (C) 2006, 2007 University of Szeged, Hungary
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Adrian Hunter
* Artem Bityutskiy (Битюцкий Артём)
* Zoltan Sogor
*/
/*
* This file provides a single place to access to compression and
* decompression.
*/
#include <linux/crypto.h>
#include "ubifs.h"
/* Fake description object for the "none" compressor */
static struct ubifs_compressor none_compr = {
.compr_type = UBIFS_COMPR_NONE,
.name = "none",
.capi_name = "",
};
#ifdef CONFIG_UBIFS_FS_LZO
static DEFINE_MUTEX(lzo_mutex);
static struct ubifs_compressor lzo_compr = {
.compr_type = UBIFS_COMPR_LZO,
.comp_mutex = &lzo_mutex,
.name = "lzo",
.capi_name = "lzo",
};
#else
static struct ubifs_compressor lzo_compr = {
.compr_type = UBIFS_COMPR_LZO,
.name = "lzo",
};
#endif
#ifdef CONFIG_UBIFS_FS_ZLIB
static DEFINE_MUTEX(deflate_mutex);
static DEFINE_MUTEX(inflate_mutex);
static struct ubifs_compressor zlib_compr = {
.compr_type = UBIFS_COMPR_ZLIB,
.comp_mutex = &deflate_mutex,
.decomp_mutex = &inflate_mutex,
.name = "zlib",
.capi_name = "deflate",
};
#else
static struct ubifs_compressor zlib_compr = {
.compr_type = UBIFS_COMPR_ZLIB,
.name = "zlib",
};
#endif
/* All UBIFS compressors */
struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
/**
* ubifs_compress - compress data.
* @in_buf: data to compress
* @in_len: length of the data to compress
* @out_buf: output buffer where compressed data should be stored
* @out_len: output buffer length is returned here
* @compr_type: type of compression to use on enter, actually used compression
* type on exit
*
* This function compresses input buffer @in_buf of length @in_len and stores
* the result in the output buffer @out_buf and the resulting length in
* @out_len. If the input buffer does not compress, it is just copied to the
* @out_buf. The same happens if @compr_type is %UBIFS_COMPR_NONE or if
* compression error occurred.
*
* Note, if the input buffer was not compressed, it is copied to the output
* buffer and %UBIFS_COMPR_NONE is returned in @compr_type.
*/
void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
int *compr_type)
{
int err;
struct ubifs_compressor *compr = ubifs_compressors[*compr_type];
if (*compr_type == UBIFS_COMPR_NONE)
goto no_compr;
/* If the input data is small, do not even try to compress it */
if (in_len < UBIFS_MIN_COMPR_LEN)
goto no_compr;
if (compr->comp_mutex)
mutex_lock(compr->comp_mutex);
err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf,
(unsigned int *)out_len);
if (compr->comp_mutex)
mutex_unlock(compr->comp_mutex);
if (unlikely(err)) {
ubifs_warn("cannot compress %d bytes, compressor %s, error %d, leave data uncompressed",
in_len, compr->name, err);
goto no_compr;
}
/*
* If the data compressed only slightly, it is better to leave it
* uncompressed to improve read speed.
*/
if (in_len - *out_len < UBIFS_MIN_COMPRESS_DIFF)
goto no_compr;
return;
no_compr:
memcpy(out_buf, in_buf, in_len);
*out_len = in_len;
*compr_type = UBIFS_COMPR_NONE;
}
/**
* ubifs_decompress - decompress data.
* @in_buf: data to decompress
* @in_len: length of the data to decompress
* @out_buf: output buffer where decompressed data should
* @out_len: output length is returned here
* @compr_type: type of compression
*
* This function decompresses data from buffer @in_buf into buffer @out_buf.
* The length of the uncompressed data is returned in @out_len. This functions
* returns %0 on success or a negative error code on failure.
*/
int ubifs_decompress(const void *in_buf, int in_len, void *out_buf,
int *out_len, int compr_type)
{
int err;
struct ubifs_compressor *compr;
if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) {
ubifs_err("invalid compression type %d", compr_type);
return -EINVAL;
}
compr = ubifs_compressors[compr_type];
if (unlikely(!compr->capi_name)) {
ubifs_err("%s compression is not compiled in", compr->name);
return -EINVAL;
}
if (compr_type == UBIFS_COMPR_NONE) {
memcpy(out_buf, in_buf, in_len);
*out_len = in_len;
return 0;
}
if (compr->decomp_mutex)
mutex_lock(compr->decomp_mutex);
err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf,
(unsigned int *)out_len);
if (compr->decomp_mutex)
mutex_unlock(compr->decomp_mutex);
if (err)
ubifs_err("cannot decompress %d bytes, compressor %s, error %d",
in_len, compr->name, err);
return err;
}
/**
* compr_init - initialize a compressor.
* @compr: compressor description object
*
* This function initializes the requested compressor and returns zero in case
* of success or a negative error code in case of failure.
*/
static int __init compr_init(struct ubifs_compressor *compr)
{
if (compr->capi_name) {
compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0);
if (IS_ERR(compr->cc)) {
ubifs_err("cannot initialize compressor %s, error %ld",
compr->name, PTR_ERR(compr->cc));
return PTR_ERR(compr->cc);
}
}
ubifs_compressors[compr->compr_type] = compr;
return 0;
}
/**
* compr_exit - de-initialize a compressor.
* @compr: compressor description object
*/
static void compr_exit(struct ubifs_compressor *compr)
{
if (compr->capi_name)
crypto_free_comp(compr->cc);
return;
}
/**
* ubifs_compressors_init - initialize UBIFS compressors.
*
* This function initializes the compressor which were compiled in. Returns
* zero in case of success and a negative error code in case of failure.
*/
int __init ubifs_compressors_init(void)
{
int err;
err = compr_init(&lzo_compr);
if (err)
return err;
err = compr_init(&zlib_compr);
if (err)
goto out_lzo;
ubifs_compressors[UBIFS_COMPR_NONE] = &none_compr;
return 0;
out_lzo:
compr_exit(&lzo_compr);
return err;
}
/**
* ubifs_compressors_exit - de-initialize UBIFS compressors.
*/
void ubifs_compressors_exit(void)
{
compr_exit(&lzo_compr);
compr_exit(&zlib_compr);
}

3100
fs/ubifs/debug.c Normal file

File diff suppressed because it is too large Load diff

315
fs/ubifs/debug.h Normal file
View file

@ -0,0 +1,315 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Artem Bityutskiy (Битюцкий Артём)
* Adrian Hunter
*/
#ifndef __UBIFS_DEBUG_H__
#define __UBIFS_DEBUG_H__
/* Checking helper functions */
typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
struct ubifs_zbranch *zbr, void *priv);
typedef int (*dbg_znode_callback)(struct ubifs_info *c,
struct ubifs_znode *znode, void *priv);
/*
* The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi"
* + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte.
*/
#define UBIFS_DFS_DIR_NAME "ubi%d_%d"
#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1)
/**
* ubifs_debug_info - per-FS debugging information.
* @old_zroot: old index root - used by 'dbg_check_old_index()'
* @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
* @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
*
* @pc_happened: non-zero if an emulated power cut happened
* @pc_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
* @pc_timeout: time in jiffies when delay of failure mode expires
* @pc_cnt: current number of calls to failure mode I/O functions
* @pc_cnt_max: number of calls by which to delay failure mode
*
* @chk_lpt_sz: used by LPT tree size checker
* @chk_lpt_sz2: used by LPT tree size checker
* @chk_lpt_wastage: used by LPT tree size checker
* @chk_lpt_lebs: used by LPT tree size checker
* @new_nhead_offs: used by LPT tree size checker
* @new_ihead_lnum: used by debugging to check @c->ihead_lnum
* @new_ihead_offs: used by debugging to check @c->ihead_offs
*
* @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()')
* @saved_bi: saved budgeting information
* @saved_free: saved amount of free space
* @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt
*
* @chk_gen: if general extra checks are enabled
* @chk_index: if index xtra checks are enabled
* @chk_orph: if orphans extra checks are enabled
* @chk_lprops: if lprops extra checks are enabled
* @chk_fs: if UBIFS contents extra checks are enabled
* @tst_rcvry: if UBIFS recovery testing mode enabled
*
* @dfs_dir_name: name of debugfs directory containing this file-system's files
* @dfs_dir: direntry object of the file-system debugfs directory
* @dfs_dump_lprops: "dump lprops" debugfs knob
* @dfs_dump_budg: "dump budgeting information" debugfs knob
* @dfs_dump_tnc: "dump TNC" debugfs knob
* @dfs_chk_gen: debugfs knob to enable UBIFS general extra checks
* @dfs_chk_index: debugfs knob to enable UBIFS index extra checks
* @dfs_chk_orph: debugfs knob to enable UBIFS orphans extra checks
* @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks
* @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks
* @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing
* @dfs_ro_error: debugfs knob to switch UBIFS to R/O mode (different to
* re-mounting to R/O mode because it does not flush any buffers
* and UBIFS just starts returning -EROFS on all write
* operations)
*/
struct ubifs_debug_info {
struct ubifs_zbranch old_zroot;
int old_zroot_level;
unsigned long long old_zroot_sqnum;
int pc_happened;
int pc_delay;
unsigned long pc_timeout;
unsigned int pc_cnt;
unsigned int pc_cnt_max;
long long chk_lpt_sz;
long long chk_lpt_sz2;
long long chk_lpt_wastage;
int chk_lpt_lebs;
int new_nhead_offs;
int new_ihead_lnum;
int new_ihead_offs;
struct ubifs_lp_stats saved_lst;
struct ubifs_budg_info saved_bi;
long long saved_free;
int saved_idx_gc_cnt;
unsigned int chk_gen:1;
unsigned int chk_index:1;
unsigned int chk_orph:1;
unsigned int chk_lprops:1;
unsigned int chk_fs:1;
unsigned int tst_rcvry:1;
char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1];
struct dentry *dfs_dir;
struct dentry *dfs_dump_lprops;
struct dentry *dfs_dump_budg;
struct dentry *dfs_dump_tnc;
struct dentry *dfs_chk_gen;
struct dentry *dfs_chk_index;
struct dentry *dfs_chk_orph;
struct dentry *dfs_chk_lprops;
struct dentry *dfs_chk_fs;
struct dentry *dfs_tst_rcvry;
struct dentry *dfs_ro_error;
};
/**
* ubifs_global_debug_info - global (not per-FS) UBIFS debugging information.
*
* @chk_gen: if general extra checks are enabled
* @chk_index: if index xtra checks are enabled
* @chk_orph: if orphans extra checks are enabled
* @chk_lprops: if lprops extra checks are enabled
* @chk_fs: if UBIFS contents extra checks are enabled
* @tst_rcvry: if UBIFS recovery testing mode enabled
*/
struct ubifs_global_debug_info {
unsigned int chk_gen:1;
unsigned int chk_index:1;
unsigned int chk_orph:1;
unsigned int chk_lprops:1;
unsigned int chk_fs:1;
unsigned int tst_rcvry:1;
};
#define ubifs_assert(expr) do { \
if (unlikely(!(expr))) { \
pr_crit("UBIFS assert failed in %s at %u (pid %d)\n", \
__func__, __LINE__, current->pid); \
dump_stack(); \
} \
} while (0)
#define ubifs_assert_cmt_locked(c) do { \
if (unlikely(down_write_trylock(&(c)->commit_sem))) { \
up_write(&(c)->commit_sem); \
pr_crit("commit lock is not locked!\n"); \
ubifs_assert(0); \
} \
} while (0)
#define ubifs_dbg_msg(type, fmt, ...) \
pr_debug("UBIFS DBG " type " (pid %d): " fmt "\n", current->pid, \
##__VA_ARGS__)
#define DBG_KEY_BUF_LEN 48
#define ubifs_dbg_msg_key(type, key, fmt, ...) do { \
char __tmp_key_buf[DBG_KEY_BUF_LEN]; \
pr_debug("UBIFS DBG " type " (pid %d): " fmt "%s\n", current->pid, \
##__VA_ARGS__, \
dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN)); \
} while (0)
/* General messages */
#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__)
/* Additional journal messages */
#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__)
#define dbg_jnlk(key, fmt, ...) \
ubifs_dbg_msg_key("jnl", key, fmt, ##__VA_ARGS__)
/* Additional TNC messages */
#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__)
#define dbg_tnck(key, fmt, ...) \
ubifs_dbg_msg_key("tnc", key, fmt, ##__VA_ARGS__)
/* Additional lprops messages */
#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__)
/* Additional LEB find messages */
#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__)
/* Additional mount messages */
#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__)
#define dbg_mntk(key, fmt, ...) \
ubifs_dbg_msg_key("mnt", key, fmt, ##__VA_ARGS__)
/* Additional I/O messages */
#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__)
/* Additional commit messages */
#define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__)
/* Additional budgeting messages */
#define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__)
/* Additional log messages */
#define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__)
/* Additional gc messages */
#define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__)
/* Additional scan messages */
#define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__)
/* Additional recovery messages */
#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
extern struct ubifs_global_debug_info ubifs_dbg;
static inline int dbg_is_chk_gen(const struct ubifs_info *c)
{
return !!(ubifs_dbg.chk_gen || c->dbg->chk_gen);
}
static inline int dbg_is_chk_index(const struct ubifs_info *c)
{
return !!(ubifs_dbg.chk_index || c->dbg->chk_index);
}
static inline int dbg_is_chk_orph(const struct ubifs_info *c)
{
return !!(ubifs_dbg.chk_orph || c->dbg->chk_orph);
}
static inline int dbg_is_chk_lprops(const struct ubifs_info *c)
{
return !!(ubifs_dbg.chk_lprops || c->dbg->chk_lprops);
}
static inline int dbg_is_chk_fs(const struct ubifs_info *c)
{
return !!(ubifs_dbg.chk_fs || c->dbg->chk_fs);
}
static inline int dbg_is_tst_rcvry(const struct ubifs_info *c)
{
return !!(ubifs_dbg.tst_rcvry || c->dbg->tst_rcvry);
}
static inline int dbg_is_power_cut(const struct ubifs_info *c)
{
return !!c->dbg->pc_happened;
}
int ubifs_debugging_init(struct ubifs_info *c);
void ubifs_debugging_exit(struct ubifs_info *c);
/* Dump functions */
const char *dbg_ntype(int type);
const char *dbg_cstate(int cmt_state);
const char *dbg_jhead(int jhead);
const char *dbg_get_key_dump(const struct ubifs_info *c,
const union ubifs_key *key);
const char *dbg_snprintf_key(const struct ubifs_info *c,
const union ubifs_key *key, char *buffer, int len);
void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode);
void ubifs_dump_node(const struct ubifs_info *c, const void *node);
void ubifs_dump_budget_req(const struct ubifs_budget_req *req);
void ubifs_dump_lstats(const struct ubifs_lp_stats *lst);
void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi);
void ubifs_dump_lprop(const struct ubifs_info *c,
const struct ubifs_lprops *lp);
void ubifs_dump_lprops(struct ubifs_info *c);
void ubifs_dump_lpt_info(struct ubifs_info *c);
void ubifs_dump_leb(const struct ubifs_info *c, int lnum);
void ubifs_dump_sleb(const struct ubifs_info *c,
const struct ubifs_scan_leb *sleb, int offs);
void ubifs_dump_znode(const struct ubifs_info *c,
const struct ubifs_znode *znode);
void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap,
int cat);
void ubifs_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
struct ubifs_nnode *parent, int iip);
void ubifs_dump_tnc(struct ubifs_info *c);
void ubifs_dump_index(struct ubifs_info *c);
void ubifs_dump_lpt_lebs(const struct ubifs_info *c);
int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
dbg_znode_callback znode_cb, void *priv);
/* Checking functions */
void dbg_save_space_info(struct ubifs_info *c);
int dbg_check_space_info(struct ubifs_info *c);
int dbg_check_lprops(struct ubifs_info *c);
int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot);
int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot);
int dbg_check_cats(struct ubifs_info *c);
int dbg_check_ltab(struct ubifs_info *c);
int dbg_chk_lpt_free_spc(struct ubifs_info *c);
int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len);
int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode);
int dbg_check_dir(struct ubifs_info *c, const struct inode *dir);
int dbg_check_tnc(struct ubifs_info *c, int extra);
int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
int dbg_check_filesystem(struct ubifs_info *c);
void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
int add_pos);
int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
int row, int col);
int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
loff_t size);
int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head);
int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head);
int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
int len);
int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len);
int dbg_leb_unmap(struct ubifs_info *c, int lnum);
int dbg_leb_map(struct ubifs_info *c, int lnum);
/* Debugfs-related stuff */
int dbg_debugfs_init(void);
void dbg_debugfs_exit(void);
int dbg_debugfs_init_fs(struct ubifs_info *c);
void dbg_debugfs_exit_fs(struct ubifs_info *c);
#endif /* !__UBIFS_DEBUG_H__ */

1185
fs/ubifs/dir.c Normal file

File diff suppressed because it is too large Load diff

1592
fs/ubifs/file.c Normal file

File diff suppressed because it is too large Load diff

985
fs/ubifs/find.c Normal file
View file

@ -0,0 +1,985 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Artem Bityutskiy (Битюцкий Артём)
* Adrian Hunter
*/
/*
* This file contains functions for finding LEBs for various purposes e.g.
* garbage collection. In general, lprops category heaps and lists are used
* for fast access, falling back on scanning the LPT as a last resort.
*/
#include <linux/sort.h>
#include "ubifs.h"
/**
* struct scan_data - data provided to scan callback functions
* @min_space: minimum number of bytes for which to scan
* @pick_free: whether it is OK to scan for empty LEBs
* @lnum: LEB number found is returned here
* @exclude_index: whether to exclude index LEBs
*/
struct scan_data {
int min_space;
int pick_free;
int lnum;
int exclude_index;
};
/**
* valuable - determine whether LEB properties are valuable.
* @c: the UBIFS file-system description object
* @lprops: LEB properties
*
* This function return %1 if the LEB properties should be added to the LEB
* properties tree in memory. Otherwise %0 is returned.
*/
static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops)
{
int n, cat = lprops->flags & LPROPS_CAT_MASK;
struct ubifs_lpt_heap *heap;
switch (cat) {
case LPROPS_DIRTY:
case LPROPS_DIRTY_IDX:
case LPROPS_FREE:
heap = &c->lpt_heap[cat - 1];
if (heap->cnt < heap->max_cnt)
return 1;
if (lprops->free + lprops->dirty >= c->dark_wm)
return 1;
return 0;
case LPROPS_EMPTY:
n = c->lst.empty_lebs + c->freeable_cnt -
c->lst.taken_empty_lebs;
if (n < c->lsave_cnt)
return 1;
return 0;
case LPROPS_FREEABLE:
return 1;
case LPROPS_FRDI_IDX:
return 1;
}
return 0;
}
/**
* scan_for_dirty_cb - dirty space scan callback.
* @c: the UBIFS file-system description object
* @lprops: LEB properties to scan
* @in_tree: whether the LEB properties are in main memory
* @data: information passed to and from the caller of the scan
*
* This function returns a code that indicates whether the scan should continue
* (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
* in main memory (%LPT_SCAN_ADD), or whether the scan should stop
* (%LPT_SCAN_STOP).
*/
static int scan_for_dirty_cb(struct ubifs_info *c,
const struct ubifs_lprops *lprops, int in_tree,
struct scan_data *data)
{
int ret = LPT_SCAN_CONTINUE;
/* Exclude LEBs that are currently in use */
if (lprops->flags & LPROPS_TAKEN)
return LPT_SCAN_CONTINUE;
/* Determine whether to add these LEB properties to the tree */
if (!in_tree && valuable(c, lprops))
ret |= LPT_SCAN_ADD;
/* Exclude LEBs with too little space */
if (lprops->free + lprops->dirty < data->min_space)
return ret;
/* If specified, exclude index LEBs */
if (data->exclude_index && lprops->flags & LPROPS_INDEX)
return ret;
/* If specified, exclude empty or freeable LEBs */
if (lprops->free + lprops->dirty == c->leb_size) {
if (!data->pick_free)
return ret;
/* Exclude LEBs with too little dirty space (unless it is empty) */
} else if (lprops->dirty < c->dead_wm)
return ret;
/* Finally we found space */
data->lnum = lprops->lnum;
return LPT_SCAN_ADD | LPT_SCAN_STOP;
}
/**
* scan_for_dirty - find a data LEB with free space.
* @c: the UBIFS file-system description object
* @min_space: minimum amount free plus dirty space the returned LEB has to
* have
* @pick_free: if it is OK to return a free or freeable LEB
* @exclude_index: whether to exclude index LEBs
*
* This function returns a pointer to the LEB properties found or a negative
* error code.
*/
static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
int min_space, int pick_free,
int exclude_index)
{
const struct ubifs_lprops *lprops;
struct ubifs_lpt_heap *heap;
struct scan_data data;
int err, i;
/* There may be an LEB with enough dirty space on the free heap */
heap = &c->lpt_heap[LPROPS_FREE - 1];
for (i = 0; i < heap->cnt; i++) {
lprops = heap->arr[i];
if (lprops->free + lprops->dirty < min_space)
continue;
if (lprops->dirty < c->dead_wm)
continue;
return lprops;
}
/*
* A LEB may have fallen off of the bottom of the dirty heap, and ended
* up as uncategorized even though it has enough dirty space for us now,
* so check the uncategorized list. N.B. neither empty nor freeable LEBs
* can end up as uncategorized because they are kept on lists not
* finite-sized heaps.
*/
list_for_each_entry(lprops, &c->uncat_list, list) {
if (lprops->flags & LPROPS_TAKEN)
continue;
if (lprops->free + lprops->dirty < min_space)
continue;
if (exclude_index && (lprops->flags & LPROPS_INDEX))
continue;
if (lprops->dirty < c->dead_wm)
continue;
return lprops;
}
/* We have looked everywhere in main memory, now scan the flash */
if (c->pnodes_have >= c->pnode_cnt)
/* All pnodes are in memory, so skip scan */
return ERR_PTR(-ENOSPC);
data.min_space = min_space;
data.pick_free = pick_free;
data.lnum = -1;
data.exclude_index = exclude_index;
err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
(ubifs_lpt_scan_callback)scan_for_dirty_cb,
&data);
if (err)
return ERR_PTR(err);
ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
c->lscan_lnum = data.lnum;
lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
if (IS_ERR(lprops))
return lprops;
ubifs_assert(lprops->lnum == data.lnum);
ubifs_assert(lprops->free + lprops->dirty >= min_space);
ubifs_assert(lprops->dirty >= c->dead_wm ||
(pick_free &&
lprops->free + lprops->dirty == c->leb_size));
ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
ubifs_assert(!exclude_index || !(lprops->flags & LPROPS_INDEX));
return lprops;
}
/**
* ubifs_find_dirty_leb - find a dirty LEB for the Garbage Collector.
* @c: the UBIFS file-system description object
* @ret_lp: LEB properties are returned here on exit
* @min_space: minimum amount free plus dirty space the returned LEB has to
* have
* @pick_free: controls whether it is OK to pick empty or index LEBs
*
* This function tries to find a dirty logical eraseblock which has at least
* @min_space free and dirty space. It prefers to take an LEB from the dirty or
* dirty index heap, and it falls-back to LPT scanning if the heaps are empty
* or do not have an LEB which satisfies the @min_space criteria.
*
* Note, LEBs which have less than dead watermark of free + dirty space are
* never picked by this function.
*
* The additional @pick_free argument controls if this function has to return a
* free or freeable LEB if one is present. For example, GC must to set it to %1,
* when called from the journal space reservation function, because the
* appearance of free space may coincide with the loss of enough dirty space
* for GC to succeed anyway.
*
* In contrast, if the Garbage Collector is called from budgeting, it should
* just make free space, not return LEBs which are already free or freeable.
*
* In addition @pick_free is set to %2 by the recovery process in order to
* recover gc_lnum in which case an index LEB must not be returned.
*
* This function returns zero and the LEB properties of found dirty LEB in case
* of success, %-ENOSPC if no dirty LEB was found and a negative error code in
* case of other failures. The returned LEB is marked as "taken".
*/
int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
int min_space, int pick_free)
{
int err = 0, sum, exclude_index = pick_free == 2 ? 1 : 0;
const struct ubifs_lprops *lp = NULL, *idx_lp = NULL;
struct ubifs_lpt_heap *heap, *idx_heap;
ubifs_get_lprops(c);
if (pick_free) {
int lebs, rsvd_idx_lebs = 0;
spin_lock(&c->space_lock);
lebs = c->lst.empty_lebs + c->idx_gc_cnt;
lebs += c->freeable_cnt - c->lst.taken_empty_lebs;
/*
* Note, the index may consume more LEBs than have been reserved
* for it. It is OK because it might be consolidated by GC.
* But if the index takes fewer LEBs than it is reserved for it,
* this function must avoid picking those reserved LEBs.
*/
if (c->bi.min_idx_lebs >= c->lst.idx_lebs) {
rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
exclude_index = 1;
}
spin_unlock(&c->space_lock);
/* Check if there are enough free LEBs for the index */
if (rsvd_idx_lebs < lebs) {
/* OK, try to find an empty LEB */
lp = ubifs_fast_find_empty(c);
if (lp)
goto found;
/* Or a freeable LEB */
lp = ubifs_fast_find_freeable(c);
if (lp)
goto found;
} else
/*
* We cannot pick free/freeable LEBs in the below code.
*/
pick_free = 0;
} else {
spin_lock(&c->space_lock);
exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs);
spin_unlock(&c->space_lock);
}
/* Look on the dirty and dirty index heaps */
heap = &c->lpt_heap[LPROPS_DIRTY - 1];
idx_heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
if (idx_heap->cnt && !exclude_index) {
idx_lp = idx_heap->arr[0];
sum = idx_lp->free + idx_lp->dirty;
/*
* Since we reserve thrice as much space for the index than it
* actually takes, it does not make sense to pick indexing LEBs
* with less than, say, half LEB of dirty space. May be half is
* not the optimal boundary - this should be tested and
* checked. This boundary should determine how much we use
* in-the-gaps to consolidate the index comparing to how much
* we use garbage collector to consolidate it. The "half"
* criteria just feels to be fine.
*/
if (sum < min_space || sum < c->half_leb_size)
idx_lp = NULL;
}
if (heap->cnt) {
lp = heap->arr[0];
if (lp->dirty + lp->free < min_space)
lp = NULL;
}
/* Pick the LEB with most space */
if (idx_lp && lp) {
if (idx_lp->free + idx_lp->dirty >= lp->free + lp->dirty)
lp = idx_lp;
} else if (idx_lp && !lp)
lp = idx_lp;
if (lp) {
ubifs_assert(lp->free + lp->dirty >= c->dead_wm);
goto found;
}
/* Did not find a dirty LEB on the dirty heaps, have to scan */
dbg_find("scanning LPT for a dirty LEB");
lp = scan_for_dirty(c, min_space, pick_free, exclude_index);
if (IS_ERR(lp)) {
err = PTR_ERR(lp);
goto out;
}
ubifs_assert(lp->dirty >= c->dead_wm ||
(pick_free && lp->free + lp->dirty == c->leb_size));
found:
dbg_find("found LEB %d, free %d, dirty %d, flags %#x",
lp->lnum, lp->free, lp->dirty, lp->flags);
lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
lp->flags | LPROPS_TAKEN, 0);
if (IS_ERR(lp)) {
err = PTR_ERR(lp);
goto out;
}
memcpy(ret_lp, lp, sizeof(struct ubifs_lprops));
out:
ubifs_release_lprops(c);
return err;
}
/**
* scan_for_free_cb - free space scan callback.
* @c: the UBIFS file-system description object
* @lprops: LEB properties to scan
* @in_tree: whether the LEB properties are in main memory
* @data: information passed to and from the caller of the scan
*
* This function returns a code that indicates whether the scan should continue
* (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
* in main memory (%LPT_SCAN_ADD), or whether the scan should stop
* (%LPT_SCAN_STOP).
*/
static int scan_for_free_cb(struct ubifs_info *c,
const struct ubifs_lprops *lprops, int in_tree,
struct scan_data *data)
{
int ret = LPT_SCAN_CONTINUE;
/* Exclude LEBs that are currently in use */
if (lprops->flags & LPROPS_TAKEN)
return LPT_SCAN_CONTINUE;
/* Determine whether to add these LEB properties to the tree */
if (!in_tree && valuable(c, lprops))
ret |= LPT_SCAN_ADD;
/* Exclude index LEBs */
if (lprops->flags & LPROPS_INDEX)
return ret;
/* Exclude LEBs with too little space */
if (lprops->free < data->min_space)
return ret;
/* If specified, exclude empty LEBs */
if (!data->pick_free && lprops->free == c->leb_size)
return ret;
/*
* LEBs that have only free and dirty space must not be allocated
* because they may have been unmapped already or they may have data
* that is obsolete only because of nodes that are still sitting in a
* wbuf.
*/
if (lprops->free + lprops->dirty == c->leb_size && lprops->dirty > 0)
return ret;
/* Finally we found space */
data->lnum = lprops->lnum;
return LPT_SCAN_ADD | LPT_SCAN_STOP;
}
/**
* do_find_free_space - find a data LEB with free space.
* @c: the UBIFS file-system description object
* @min_space: minimum amount of free space required
* @pick_free: whether it is OK to scan for empty LEBs
* @squeeze: whether to try to find space in a non-empty LEB first
*
* This function returns a pointer to the LEB properties found or a negative
* error code.
*/
static
const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c,
int min_space, int pick_free,
int squeeze)
{
const struct ubifs_lprops *lprops;
struct ubifs_lpt_heap *heap;
struct scan_data data;
int err, i;
if (squeeze) {
lprops = ubifs_fast_find_free(c);
if (lprops && lprops->free >= min_space)
return lprops;
}
if (pick_free) {
lprops = ubifs_fast_find_empty(c);
if (lprops)
return lprops;
}
if (!squeeze) {
lprops = ubifs_fast_find_free(c);
if (lprops && lprops->free >= min_space)
return lprops;
}
/* There may be an LEB with enough free space on the dirty heap */
heap = &c->lpt_heap[LPROPS_DIRTY - 1];
for (i = 0; i < heap->cnt; i++) {
lprops = heap->arr[i];
if (lprops->free >= min_space)
return lprops;
}
/*
* A LEB may have fallen off of the bottom of the free heap, and ended
* up as uncategorized even though it has enough free space for us now,
* so check the uncategorized list. N.B. neither empty nor freeable LEBs
* can end up as uncategorized because they are kept on lists not
* finite-sized heaps.
*/
list_for_each_entry(lprops, &c->uncat_list, list) {
if (lprops->flags & LPROPS_TAKEN)
continue;
if (lprops->flags & LPROPS_INDEX)
continue;
if (lprops->free >= min_space)
return lprops;
}
/* We have looked everywhere in main memory, now scan the flash */
if (c->pnodes_have >= c->pnode_cnt)
/* All pnodes are in memory, so skip scan */
return ERR_PTR(-ENOSPC);
data.min_space = min_space;
data.pick_free = pick_free;
data.lnum = -1;
err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
(ubifs_lpt_scan_callback)scan_for_free_cb,
&data);
if (err)
return ERR_PTR(err);
ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
c->lscan_lnum = data.lnum;
lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
if (IS_ERR(lprops))
return lprops;
ubifs_assert(lprops->lnum == data.lnum);
ubifs_assert(lprops->free >= min_space);
ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
ubifs_assert(!(lprops->flags & LPROPS_INDEX));
return lprops;
}
/**
* ubifs_find_free_space - find a data LEB with free space.
* @c: the UBIFS file-system description object
* @min_space: minimum amount of required free space
* @offs: contains offset of where free space starts on exit
* @squeeze: whether to try to find space in a non-empty LEB first
*
* This function looks for an LEB with at least @min_space bytes of free space.
* It tries to find an empty LEB if possible. If no empty LEBs are available,
* this function searches for a non-empty data LEB. The returned LEB is marked
* as "taken".
*
* This function returns found LEB number in case of success, %-ENOSPC if it
* failed to find a LEB with @min_space bytes of free space and other a negative
* error codes in case of failure.
*/
int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
int squeeze)
{
const struct ubifs_lprops *lprops;
int lebs, rsvd_idx_lebs, pick_free = 0, err, lnum, flags;
dbg_find("min_space %d", min_space);
ubifs_get_lprops(c);
/* Check if there are enough empty LEBs for commit */
spin_lock(&c->space_lock);
if (c->bi.min_idx_lebs > c->lst.idx_lebs)
rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
else
rsvd_idx_lebs = 0;
lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
c->lst.taken_empty_lebs;
if (rsvd_idx_lebs < lebs)
/*
* OK to allocate an empty LEB, but we still don't want to go
* looking for one if there aren't any.
*/
if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
pick_free = 1;
/*
* Because we release the space lock, we must account
* for this allocation here. After the LEB properties
* flags have been updated, we subtract one. Note, the
* result of this is that lprops also decreases
* @taken_empty_lebs in 'ubifs_change_lp()', so it is
* off by one for a short period of time which may
* introduce a small disturbance to budgeting
* calculations, but this is harmless because at the
* worst case this would make the budgeting subsystem
* be more pessimistic than needed.
*
* Fundamentally, this is about serialization of the
* budgeting and lprops subsystems. We could make the
* @space_lock a mutex and avoid dropping it before
* calling 'ubifs_change_lp()', but mutex is more
* heavy-weight, and we want budgeting to be as fast as
* possible.
*/
c->lst.taken_empty_lebs += 1;
}
spin_unlock(&c->space_lock);
lprops = do_find_free_space(c, min_space, pick_free, squeeze);
if (IS_ERR(lprops)) {
err = PTR_ERR(lprops);
goto out;
}
lnum = lprops->lnum;
flags = lprops->flags | LPROPS_TAKEN;
lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC, flags, 0);
if (IS_ERR(lprops)) {
err = PTR_ERR(lprops);
goto out;
}
if (pick_free) {
spin_lock(&c->space_lock);
c->lst.taken_empty_lebs -= 1;
spin_unlock(&c->space_lock);
}
*offs = c->leb_size - lprops->free;
ubifs_release_lprops(c);
if (*offs == 0) {
/*
* Ensure that empty LEBs have been unmapped. They may not have
* been, for example, because of an unclean unmount. Also
* LEBs that were freeable LEBs (free + dirty == leb_size) will
* not have been unmapped.
*/
err = ubifs_leb_unmap(c, lnum);
if (err)
return err;
}
dbg_find("found LEB %d, free %d", lnum, c->leb_size - *offs);
ubifs_assert(*offs <= c->leb_size - min_space);
return lnum;
out:
if (pick_free) {
spin_lock(&c->space_lock);
c->lst.taken_empty_lebs -= 1;
spin_unlock(&c->space_lock);
}
ubifs_release_lprops(c);
return err;
}
/**
* scan_for_idx_cb - callback used by the scan for a free LEB for the index.
* @c: the UBIFS file-system description object
* @lprops: LEB properties to scan
* @in_tree: whether the LEB properties are in main memory
* @data: information passed to and from the caller of the scan
*
* This function returns a code that indicates whether the scan should continue
* (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
* in main memory (%LPT_SCAN_ADD), or whether the scan should stop
* (%LPT_SCAN_STOP).
*/
static int scan_for_idx_cb(struct ubifs_info *c,
const struct ubifs_lprops *lprops, int in_tree,
struct scan_data *data)
{
int ret = LPT_SCAN_CONTINUE;
/* Exclude LEBs that are currently in use */
if (lprops->flags & LPROPS_TAKEN)
return LPT_SCAN_CONTINUE;
/* Determine whether to add these LEB properties to the tree */
if (!in_tree && valuable(c, lprops))
ret |= LPT_SCAN_ADD;
/* Exclude index LEBS */
if (lprops->flags & LPROPS_INDEX)
return ret;
/* Exclude LEBs that cannot be made empty */
if (lprops->free + lprops->dirty != c->leb_size)
return ret;
/*
* We are allocating for the index so it is safe to allocate LEBs with
* only free and dirty space, because write buffers are sync'd at commit
* start.
*/
data->lnum = lprops->lnum;
return LPT_SCAN_ADD | LPT_SCAN_STOP;
}
/**
* scan_for_leb_for_idx - scan for a free LEB for the index.
* @c: the UBIFS file-system description object
*/
static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c)
{
struct ubifs_lprops *lprops;
struct scan_data data;
int err;
data.lnum = -1;
err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
(ubifs_lpt_scan_callback)scan_for_idx_cb,
&data);
if (err)
return ERR_PTR(err);
ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
c->lscan_lnum = data.lnum;
lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
if (IS_ERR(lprops))
return lprops;
ubifs_assert(lprops->lnum == data.lnum);
ubifs_assert(lprops->free + lprops->dirty == c->leb_size);
ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
ubifs_assert(!(lprops->flags & LPROPS_INDEX));
return lprops;
}
/**
* ubifs_find_free_leb_for_idx - find a free LEB for the index.
* @c: the UBIFS file-system description object
*
* This function looks for a free LEB and returns that LEB number. The returned
* LEB is marked as "taken", "index".
*
* Only empty LEBs are allocated. This is for two reasons. First, the commit
* calculates the number of LEBs to allocate based on the assumption that they
* will be empty. Secondly, free space at the end of an index LEB is not
* guaranteed to be empty because it may have been used by the in-the-gaps
* method prior to an unclean unmount.
*
* If no LEB is found %-ENOSPC is returned. For other failures another negative
* error code is returned.
*/
int ubifs_find_free_leb_for_idx(struct ubifs_info *c)
{
const struct ubifs_lprops *lprops;
int lnum = -1, err, flags;
ubifs_get_lprops(c);
lprops = ubifs_fast_find_empty(c);
if (!lprops) {
lprops = ubifs_fast_find_freeable(c);
if (!lprops) {
/*
* The first condition means the following: go scan the
* LPT if there are uncategorized lprops, which means
* there may be freeable LEBs there (UBIFS does not
* store the information about freeable LEBs in the
* master node).
*/
if (c->in_a_category_cnt != c->main_lebs ||
c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
ubifs_assert(c->freeable_cnt == 0);
lprops = scan_for_leb_for_idx(c);
if (IS_ERR(lprops)) {
err = PTR_ERR(lprops);
goto out;
}
}
}
}
if (!lprops) {
err = -ENOSPC;
goto out;
}
lnum = lprops->lnum;
dbg_find("found LEB %d, free %d, dirty %d, flags %#x",
lnum, lprops->free, lprops->dirty, lprops->flags);
flags = lprops->flags | LPROPS_TAKEN | LPROPS_INDEX;
lprops = ubifs_change_lp(c, lprops, c->leb_size, 0, flags, 0);
if (IS_ERR(lprops)) {
err = PTR_ERR(lprops);
goto out;
}
ubifs_release_lprops(c);
/*
* Ensure that empty LEBs have been unmapped. They may not have been,
* for example, because of an unclean unmount. Also LEBs that were
* freeable LEBs (free + dirty == leb_size) will not have been unmapped.
*/
err = ubifs_leb_unmap(c, lnum);
if (err) {
ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
LPROPS_TAKEN | LPROPS_INDEX, 0);
return err;
}
return lnum;
out:
ubifs_release_lprops(c);
return err;
}
static int cmp_dirty_idx(const struct ubifs_lprops **a,
const struct ubifs_lprops **b)
{
const struct ubifs_lprops *lpa = *a;
const struct ubifs_lprops *lpb = *b;
return lpa->dirty + lpa->free - lpb->dirty - lpb->free;
}
static void swap_dirty_idx(struct ubifs_lprops **a, struct ubifs_lprops **b,
int size)
{
struct ubifs_lprops *t = *a;
*a = *b;
*b = t;
}
/**
* ubifs_save_dirty_idx_lnums - save an array of the most dirty index LEB nos.
* @c: the UBIFS file-system description object
*
* This function is called each commit to create an array of LEB numbers of
* dirty index LEBs sorted in order of dirty and free space. This is used by
* the in-the-gaps method of TNC commit.
*/
int ubifs_save_dirty_idx_lnums(struct ubifs_info *c)
{
int i;
ubifs_get_lprops(c);
/* Copy the LPROPS_DIRTY_IDX heap */
c->dirty_idx.cnt = c->lpt_heap[LPROPS_DIRTY_IDX - 1].cnt;
memcpy(c->dirty_idx.arr, c->lpt_heap[LPROPS_DIRTY_IDX - 1].arr,
sizeof(void *) * c->dirty_idx.cnt);
/* Sort it so that the dirtiest is now at the end */
sort(c->dirty_idx.arr, c->dirty_idx.cnt, sizeof(void *),
(int (*)(const void *, const void *))cmp_dirty_idx,
(void (*)(void *, void *, int))swap_dirty_idx);
dbg_find("found %d dirty index LEBs", c->dirty_idx.cnt);
if (c->dirty_idx.cnt)
dbg_find("dirtiest index LEB is %d with dirty %d and free %d",
c->dirty_idx.arr[c->dirty_idx.cnt - 1]->lnum,
c->dirty_idx.arr[c->dirty_idx.cnt - 1]->dirty,
c->dirty_idx.arr[c->dirty_idx.cnt - 1]->free);
/* Replace the lprops pointers with LEB numbers */
for (i = 0; i < c->dirty_idx.cnt; i++)
c->dirty_idx.arr[i] = (void *)(size_t)c->dirty_idx.arr[i]->lnum;
ubifs_release_lprops(c);
return 0;
}
/**
* scan_dirty_idx_cb - callback used by the scan for a dirty index LEB.
* @c: the UBIFS file-system description object
* @lprops: LEB properties to scan
* @in_tree: whether the LEB properties are in main memory
* @data: information passed to and from the caller of the scan
*
* This function returns a code that indicates whether the scan should continue
* (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
* in main memory (%LPT_SCAN_ADD), or whether the scan should stop
* (%LPT_SCAN_STOP).
*/
static int scan_dirty_idx_cb(struct ubifs_info *c,
const struct ubifs_lprops *lprops, int in_tree,
struct scan_data *data)
{
int ret = LPT_SCAN_CONTINUE;
/* Exclude LEBs that are currently in use */
if (lprops->flags & LPROPS_TAKEN)
return LPT_SCAN_CONTINUE;
/* Determine whether to add these LEB properties to the tree */
if (!in_tree && valuable(c, lprops))
ret |= LPT_SCAN_ADD;
/* Exclude non-index LEBs */
if (!(lprops->flags & LPROPS_INDEX))
return ret;
/* Exclude LEBs with too little space */
if (lprops->free + lprops->dirty < c->min_idx_node_sz)
return ret;
/* Finally we found space */
data->lnum = lprops->lnum;
return LPT_SCAN_ADD | LPT_SCAN_STOP;
}
/**
* find_dirty_idx_leb - find a dirty index LEB.
* @c: the UBIFS file-system description object
*
* This function returns LEB number upon success and a negative error code upon
* failure. In particular, -ENOSPC is returned if a dirty index LEB is not
* found.
*
* Note that this function scans the entire LPT but it is called very rarely.
*/
static int find_dirty_idx_leb(struct ubifs_info *c)
{
const struct ubifs_lprops *lprops;
struct ubifs_lpt_heap *heap;
struct scan_data data;
int err, i, ret;
/* Check all structures in memory first */
data.lnum = -1;
heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
for (i = 0; i < heap->cnt; i++) {
lprops = heap->arr[i];
ret = scan_dirty_idx_cb(c, lprops, 1, &data);
if (ret & LPT_SCAN_STOP)
goto found;
}
list_for_each_entry(lprops, &c->frdi_idx_list, list) {
ret = scan_dirty_idx_cb(c, lprops, 1, &data);
if (ret & LPT_SCAN_STOP)
goto found;
}
list_for_each_entry(lprops, &c->uncat_list, list) {
ret = scan_dirty_idx_cb(c, lprops, 1, &data);
if (ret & LPT_SCAN_STOP)
goto found;
}
if (c->pnodes_have >= c->pnode_cnt)
/* All pnodes are in memory, so skip scan */
return -ENOSPC;
err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
(ubifs_lpt_scan_callback)scan_dirty_idx_cb,
&data);
if (err)
return err;
found:
ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
c->lscan_lnum = data.lnum;
lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
if (IS_ERR(lprops))
return PTR_ERR(lprops);
ubifs_assert(lprops->lnum == data.lnum);
ubifs_assert(lprops->free + lprops->dirty >= c->min_idx_node_sz);
ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
ubifs_assert((lprops->flags & LPROPS_INDEX));
dbg_find("found dirty LEB %d, free %d, dirty %d, flags %#x",
lprops->lnum, lprops->free, lprops->dirty, lprops->flags);
lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC,
lprops->flags | LPROPS_TAKEN, 0);
if (IS_ERR(lprops))
return PTR_ERR(lprops);
return lprops->lnum;
}
/**
* get_idx_gc_leb - try to get a LEB number from trivial GC.
* @c: the UBIFS file-system description object
*/
static int get_idx_gc_leb(struct ubifs_info *c)
{
const struct ubifs_lprops *lp;
int err, lnum;
err = ubifs_get_idx_gc_leb(c);
if (err < 0)
return err;
lnum = err;
/*
* The LEB was due to be unmapped after the commit but
* it is needed now for this commit.
*/
lp = ubifs_lpt_lookup_dirty(c, lnum);
if (IS_ERR(lp))
return PTR_ERR(lp);
lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
lp->flags | LPROPS_INDEX, -1);
if (IS_ERR(lp))
return PTR_ERR(lp);
dbg_find("LEB %d, dirty %d and free %d flags %#x",
lp->lnum, lp->dirty, lp->free, lp->flags);
return lnum;
}
/**
* find_dirtiest_idx_leb - find dirtiest index LEB from dirtiest array.
* @c: the UBIFS file-system description object
*/
static int find_dirtiest_idx_leb(struct ubifs_info *c)
{
const struct ubifs_lprops *lp;
int lnum;
while (1) {
if (!c->dirty_idx.cnt)
return -ENOSPC;
/* The lprops pointers were replaced by LEB numbers */
lnum = (size_t)c->dirty_idx.arr[--c->dirty_idx.cnt];
lp = ubifs_lpt_lookup(c, lnum);
if (IS_ERR(lp))
return PTR_ERR(lp);
if ((lp->flags & LPROPS_TAKEN) || !(lp->flags & LPROPS_INDEX))
continue;
lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
lp->flags | LPROPS_TAKEN, 0);
if (IS_ERR(lp))
return PTR_ERR(lp);
break;
}
dbg_find("LEB %d, dirty %d and free %d flags %#x", lp->lnum, lp->dirty,
lp->free, lp->flags);
ubifs_assert(lp->flags & LPROPS_TAKEN);
ubifs_assert(lp->flags & LPROPS_INDEX);
return lnum;
}
/**
* ubifs_find_dirty_idx_leb - try to find dirtiest index LEB as at last commit.
* @c: the UBIFS file-system description object
*
* This function attempts to find an untaken index LEB with the most free and
* dirty space that can be used without overwriting index nodes that were in the
* last index committed.
*/
int ubifs_find_dirty_idx_leb(struct ubifs_info *c)
{
int err;
ubifs_get_lprops(c);
/*
* We made an array of the dirtiest index LEB numbers as at the start of
* last commit. Try that array first.
*/
err = find_dirtiest_idx_leb(c);
/* Next try scanning the entire LPT */
if (err == -ENOSPC)
err = find_dirty_idx_leb(c);
/* Finally take any index LEBs awaiting trivial GC */
if (err == -ENOSPC)
err = get_idx_gc_leb(c);
ubifs_release_lprops(c);
return err;
}

984
fs/ubifs/gc.c Normal file
View file

@ -0,0 +1,984 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Adrian Hunter
* Artem Bityutskiy (Битюцкий Артём)
*/
/*
* This file implements garbage collection. The procedure for garbage collection
* is different depending on whether a LEB as an index LEB (contains index
* nodes) or not. For non-index LEBs, garbage collection finds a LEB which
* contains a lot of dirty space (obsolete nodes), and copies the non-obsolete
* nodes to the journal, at which point the garbage-collected LEB is free to be
* reused. For index LEBs, garbage collection marks the non-obsolete index nodes
* dirty in the TNC, and after the next commit, the garbage-collected LEB is
* to be reused. Garbage collection will cause the number of dirty index nodes
* to grow, however sufficient space is reserved for the index to ensure the
* commit will never run out of space.
*
* Notes about dead watermark. At current UBIFS implementation we assume that
* LEBs which have less than @c->dead_wm bytes of free + dirty space are full
* and not worth garbage-collecting. The dead watermark is one min. I/O unit
* size, or min. UBIFS node size, depending on what is greater. Indeed, UBIFS
* Garbage Collector has to synchronize the GC head's write buffer before
* returning, so this is about wasting one min. I/O unit. However, UBIFS GC can
* actually reclaim even very small pieces of dirty space by garbage collecting
* enough dirty LEBs, but we do not bother doing this at this implementation.
*
* Notes about dark watermark. The results of GC work depends on how big are
* the UBIFS nodes GC deals with. Large nodes make GC waste more space. Indeed,
* if GC move data from LEB A to LEB B and nodes in LEB A are large, GC would
* have to waste large pieces of free space at the end of LEB B, because nodes
* from LEB A would not fit. And the worst situation is when all nodes are of
* maximum size. So dark watermark is the amount of free + dirty space in LEB
* which are guaranteed to be reclaimable. If LEB has less space, the GC might
* be unable to reclaim it. So, LEBs with free + dirty greater than dark
* watermark are "good" LEBs from GC's point of few. The other LEBs are not so
* good, and GC takes extra care when moving them.
*/
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/list_sort.h>
#include "ubifs.h"
/*
* GC may need to move more than one LEB to make progress. The below constants
* define "soft" and "hard" limits on the number of LEBs the garbage collector
* may move.
*/
#define SOFT_LEBS_LIMIT 4
#define HARD_LEBS_LIMIT 32
/**
* switch_gc_head - switch the garbage collection journal head.
* @c: UBIFS file-system description object
* @buf: buffer to write
* @len: length of the buffer to write
* @lnum: LEB number written is returned here
* @offs: offset written is returned here
*
* This function switch the GC head to the next LEB which is reserved in
* @c->gc_lnum. Returns %0 in case of success, %-EAGAIN if commit is required,
* and other negative error code in case of failures.
*/
static int switch_gc_head(struct ubifs_info *c)
{
int err, gc_lnum = c->gc_lnum;
struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
ubifs_assert(gc_lnum != -1);
dbg_gc("switch GC head from LEB %d:%d to LEB %d (waste %d bytes)",
wbuf->lnum, wbuf->offs + wbuf->used, gc_lnum,
c->leb_size - wbuf->offs - wbuf->used);
err = ubifs_wbuf_sync_nolock(wbuf);
if (err)
return err;
/*
* The GC write-buffer was synchronized, we may safely unmap
* 'c->gc_lnum'.
*/
err = ubifs_leb_unmap(c, gc_lnum);
if (err)
return err;
err = ubifs_wbuf_sync_nolock(wbuf);
if (err)
return err;
err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0);
if (err)
return err;
c->gc_lnum = -1;
err = ubifs_wbuf_seek_nolock(wbuf, gc_lnum, 0);
return err;
}
/**
* data_nodes_cmp - compare 2 data nodes.
* @priv: UBIFS file-system description object
* @a: first data node
* @a: second data node
*
* This function compares data nodes @a and @b. Returns %1 if @a has greater
* inode or block number, and %-1 otherwise.
*/
static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
{
ino_t inuma, inumb;
struct ubifs_info *c = priv;
struct ubifs_scan_node *sa, *sb;
cond_resched();
if (a == b)
return 0;
sa = list_entry(a, struct ubifs_scan_node, list);
sb = list_entry(b, struct ubifs_scan_node, list);
ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY);
ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY);
ubifs_assert(sa->type == UBIFS_DATA_NODE);
ubifs_assert(sb->type == UBIFS_DATA_NODE);
inuma = key_inum(c, &sa->key);
inumb = key_inum(c, &sb->key);
if (inuma == inumb) {
unsigned int blka = key_block(c, &sa->key);
unsigned int blkb = key_block(c, &sb->key);
if (blka <= blkb)
return -1;
} else if (inuma <= inumb)
return -1;
return 1;
}
/*
* nondata_nodes_cmp - compare 2 non-data nodes.
* @priv: UBIFS file-system description object
* @a: first node
* @a: second node
*
* This function compares nodes @a and @b. It makes sure that inode nodes go
* first and sorted by length in descending order. Directory entry nodes go
* after inode nodes and are sorted in ascending hash valuer order.
*/
static int nondata_nodes_cmp(void *priv, struct list_head *a,
struct list_head *b)
{
ino_t inuma, inumb;
struct ubifs_info *c = priv;
struct ubifs_scan_node *sa, *sb;
cond_resched();
if (a == b)
return 0;
sa = list_entry(a, struct ubifs_scan_node, list);
sb = list_entry(b, struct ubifs_scan_node, list);
ubifs_assert(key_type(c, &sa->key) != UBIFS_DATA_KEY &&
key_type(c, &sb->key) != UBIFS_DATA_KEY);
ubifs_assert(sa->type != UBIFS_DATA_NODE &&
sb->type != UBIFS_DATA_NODE);
/* Inodes go before directory entries */
if (sa->type == UBIFS_INO_NODE) {
if (sb->type == UBIFS_INO_NODE)
return sb->len - sa->len;
return -1;
}
if (sb->type == UBIFS_INO_NODE)
return 1;
ubifs_assert(key_type(c, &sa->key) == UBIFS_DENT_KEY ||
key_type(c, &sa->key) == UBIFS_XENT_KEY);
ubifs_assert(key_type(c, &sb->key) == UBIFS_DENT_KEY ||
key_type(c, &sb->key) == UBIFS_XENT_KEY);
ubifs_assert(sa->type == UBIFS_DENT_NODE ||
sa->type == UBIFS_XENT_NODE);
ubifs_assert(sb->type == UBIFS_DENT_NODE ||
sb->type == UBIFS_XENT_NODE);
inuma = key_inum(c, &sa->key);
inumb = key_inum(c, &sb->key);
if (inuma == inumb) {
uint32_t hasha = key_hash(c, &sa->key);
uint32_t hashb = key_hash(c, &sb->key);
if (hasha <= hashb)
return -1;
} else if (inuma <= inumb)
return -1;
return 1;
}
/**
* sort_nodes - sort nodes for GC.
* @c: UBIFS file-system description object
* @sleb: describes nodes to sort and contains the result on exit
* @nondata: contains non-data nodes on exit
* @min: minimum node size is returned here
*
* This function sorts the list of inodes to garbage collect. First of all, it
* kills obsolete nodes and separates data and non-data nodes to the
* @sleb->nodes and @nondata lists correspondingly.
*
* Data nodes are then sorted in block number order - this is important for
* bulk-read; data nodes with lower inode number go before data nodes with
* higher inode number, and data nodes with lower block number go before data
* nodes with higher block number;
*
* Non-data nodes are sorted as follows.
* o First go inode nodes - they are sorted in descending length order.
* o Then go directory entry nodes - they are sorted in hash order, which
* should supposedly optimize 'readdir()'. Direntry nodes with lower parent
* inode number go before direntry nodes with higher parent inode number,
* and direntry nodes with lower name hash values go before direntry nodes
* with higher name hash values.
*
* This function returns zero in case of success and a negative error code in
* case of failure.
*/
static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
struct list_head *nondata, int *min)
{
int err;
struct ubifs_scan_node *snod, *tmp;
*min = INT_MAX;
/* Separate data nodes and non-data nodes */
list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
ubifs_assert(snod->type == UBIFS_INO_NODE ||
snod->type == UBIFS_DATA_NODE ||
snod->type == UBIFS_DENT_NODE ||
snod->type == UBIFS_XENT_NODE ||
snod->type == UBIFS_TRUN_NODE);
if (snod->type != UBIFS_INO_NODE &&
snod->type != UBIFS_DATA_NODE &&
snod->type != UBIFS_DENT_NODE &&
snod->type != UBIFS_XENT_NODE) {
/* Probably truncation node, zap it */
list_del(&snod->list);
kfree(snod);
continue;
}
ubifs_assert(key_type(c, &snod->key) == UBIFS_DATA_KEY ||
key_type(c, &snod->key) == UBIFS_INO_KEY ||
key_type(c, &snod->key) == UBIFS_DENT_KEY ||
key_type(c, &snod->key) == UBIFS_XENT_KEY);
err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum,
snod->offs, 0);
if (err < 0)
return err;
if (!err) {
/* The node is obsolete, remove it from the list */
list_del(&snod->list);
kfree(snod);
continue;
}
if (snod->len < *min)
*min = snod->len;
if (key_type(c, &snod->key) != UBIFS_DATA_KEY)
list_move_tail(&snod->list, nondata);
}
/* Sort data and non-data nodes */
list_sort(c, &sleb->nodes, &data_nodes_cmp);
list_sort(c, nondata, &nondata_nodes_cmp);
err = dbg_check_data_nodes_order(c, &sleb->nodes);
if (err)
return err;
err = dbg_check_nondata_nodes_order(c, nondata);
if (err)
return err;
return 0;
}
/**
* move_node - move a node.
* @c: UBIFS file-system description object
* @sleb: describes the LEB to move nodes from
* @snod: the mode to move
* @wbuf: write-buffer to move node to
*
* This function moves node @snod to @wbuf, changes TNC correspondingly, and
* destroys @snod. Returns zero in case of success and a negative error code in
* case of failure.
*/
static int move_node(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
struct ubifs_scan_node *snod, struct ubifs_wbuf *wbuf)
{
int err, new_lnum = wbuf->lnum, new_offs = wbuf->offs + wbuf->used;
cond_resched();
err = ubifs_wbuf_write_nolock(wbuf, snod->node, snod->len);
if (err)
return err;
err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
snod->offs, new_lnum, new_offs,
snod->len);
list_del(&snod->list);
kfree(snod);
return err;
}
/**
* move_nodes - move nodes.
* @c: UBIFS file-system description object
* @sleb: describes the LEB to move nodes from
*
* This function moves valid nodes from data LEB described by @sleb to the GC
* journal head. This function returns zero in case of success, %-EAGAIN if
* commit is required, and other negative error codes in case of other
* failures.
*/
static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
{
int err, min;
LIST_HEAD(nondata);
struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
if (wbuf->lnum == -1) {
/*
* The GC journal head is not set, because it is the first GC
* invocation since mount.
*/
err = switch_gc_head(c);
if (err)
return err;
}
err = sort_nodes(c, sleb, &nondata, &min);
if (err)
goto out;
/* Write nodes to their new location. Use the first-fit strategy */
while (1) {
int avail;
struct ubifs_scan_node *snod, *tmp;
/* Move data nodes */
list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
avail = c->leb_size - wbuf->offs - wbuf->used;
if (snod->len > avail)
/*
* Do not skip data nodes in order to optimize
* bulk-read.
*/
break;
err = move_node(c, sleb, snod, wbuf);
if (err)
goto out;
}
/* Move non-data nodes */
list_for_each_entry_safe(snod, tmp, &nondata, list) {
avail = c->leb_size - wbuf->offs - wbuf->used;
if (avail < min)
break;
if (snod->len > avail) {
/*
* Keep going only if this is an inode with
* some data. Otherwise stop and switch the GC
* head. IOW, we assume that data-less inode
* nodes and direntry nodes are roughly of the
* same size.
*/
if (key_type(c, &snod->key) == UBIFS_DENT_KEY ||
snod->len == UBIFS_INO_NODE_SZ)
break;
continue;
}
err = move_node(c, sleb, snod, wbuf);
if (err)
goto out;
}
if (list_empty(&sleb->nodes) && list_empty(&nondata))
break;
/*
* Waste the rest of the space in the LEB and switch to the
* next LEB.
*/
err = switch_gc_head(c);
if (err)
goto out;
}
return 0;
out:
list_splice_tail(&nondata, &sleb->nodes);
return err;
}
/**
* gc_sync_wbufs - sync write-buffers for GC.
* @c: UBIFS file-system description object
*
* We must guarantee that obsoleting nodes are on flash. Unfortunately they may
* be in a write-buffer instead. That is, a node could be written to a
* write-buffer, obsoleting another node in a LEB that is GC'd. If that LEB is
* erased before the write-buffer is sync'd and then there is an unclean
* unmount, then an existing node is lost. To avoid this, we sync all
* write-buffers.
*
* This function returns %0 on success or a negative error code on failure.
*/
static int gc_sync_wbufs(struct ubifs_info *c)
{
int err, i;
for (i = 0; i < c->jhead_cnt; i++) {
if (i == GCHD)
continue;
err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
if (err)
return err;
}
return 0;
}
/**
* ubifs_garbage_collect_leb - garbage-collect a logical eraseblock.
* @c: UBIFS file-system description object
* @lp: describes the LEB to garbage collect
*
* This function garbage-collects an LEB and returns one of the @LEB_FREED,
* @LEB_RETAINED, etc positive codes in case of success, %-EAGAIN if commit is
* required, and other negative error codes in case of failures.
*/
int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
{
struct ubifs_scan_leb *sleb;
struct ubifs_scan_node *snod;
struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
int err = 0, lnum = lp->lnum;
ubifs_assert(c->gc_lnum != -1 || wbuf->offs + wbuf->used == 0 ||
c->need_recovery);
ubifs_assert(c->gc_lnum != lnum);
ubifs_assert(wbuf->lnum != lnum);
if (lp->free + lp->dirty == c->leb_size) {
/* Special case - a free LEB */
dbg_gc("LEB %d is free, return it", lp->lnum);
ubifs_assert(!(lp->flags & LPROPS_INDEX));
if (lp->free != c->leb_size) {
/*
* Write buffers must be sync'd before unmapping
* freeable LEBs, because one of them may contain data
* which obsoletes something in 'lp->pnum'.
*/
err = gc_sync_wbufs(c);
if (err)
return err;
err = ubifs_change_one_lp(c, lp->lnum, c->leb_size,
0, 0, 0, 0);
if (err)
return err;
}
err = ubifs_leb_unmap(c, lp->lnum);
if (err)
return err;
if (c->gc_lnum == -1) {
c->gc_lnum = lnum;
return LEB_RETAINED;
}
return LEB_FREED;
}
/*
* We scan the entire LEB even though we only really need to scan up to
* (c->leb_size - lp->free).
*/
sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0);
if (IS_ERR(sleb))
return PTR_ERR(sleb);
ubifs_assert(!list_empty(&sleb->nodes));
snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
if (snod->type == UBIFS_IDX_NODE) {
struct ubifs_gced_idx_leb *idx_gc;
dbg_gc("indexing LEB %d (free %d, dirty %d)",
lnum, lp->free, lp->dirty);
list_for_each_entry(snod, &sleb->nodes, list) {
struct ubifs_idx_node *idx = snod->node;
int level = le16_to_cpu(idx->level);
ubifs_assert(snod->type == UBIFS_IDX_NODE);
key_read(c, ubifs_idx_key(c, idx), &snod->key);
err = ubifs_dirty_idx_node(c, &snod->key, level, lnum,
snod->offs);
if (err)
goto out;
}
idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS);
if (!idx_gc) {
err = -ENOMEM;
goto out;
}
idx_gc->lnum = lnum;
idx_gc->unmap = 0;
list_add(&idx_gc->list, &c->idx_gc);
/*
* Don't release the LEB until after the next commit, because
* it may contain data which is needed for recovery. So
* although we freed this LEB, it will become usable only after
* the commit.
*/
err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0,
LPROPS_INDEX, 1);
if (err)
goto out;
err = LEB_FREED_IDX;
} else {
dbg_gc("data LEB %d (free %d, dirty %d)",
lnum, lp->free, lp->dirty);
err = move_nodes(c, sleb);
if (err)
goto out_inc_seq;
err = gc_sync_wbufs(c);
if (err)
goto out_inc_seq;
err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0);
if (err)
goto out_inc_seq;
/* Allow for races with TNC */
c->gced_lnum = lnum;
smp_wmb();
c->gc_seq += 1;
smp_wmb();
if (c->gc_lnum == -1) {
c->gc_lnum = lnum;
err = LEB_RETAINED;
} else {
err = ubifs_wbuf_sync_nolock(wbuf);
if (err)
goto out;
err = ubifs_leb_unmap(c, lnum);
if (err)
goto out;
err = LEB_FREED;
}
}
out:
ubifs_scan_destroy(sleb);
return err;
out_inc_seq:
/* We may have moved at least some nodes so allow for races with TNC */
c->gced_lnum = lnum;
smp_wmb();
c->gc_seq += 1;
smp_wmb();
goto out;
}
/**
* ubifs_garbage_collect - UBIFS garbage collector.
* @c: UBIFS file-system description object
* @anyway: do GC even if there are free LEBs
*
* This function does out-of-place garbage collection. The return codes are:
* o positive LEB number if the LEB has been freed and may be used;
* o %-EAGAIN if the caller has to run commit;
* o %-ENOSPC if GC failed to make any progress;
* o other negative error codes in case of other errors.
*
* Garbage collector writes data to the journal when GC'ing data LEBs, and just
* marking indexing nodes dirty when GC'ing indexing LEBs. Thus, at some point
* commit may be required. But commit cannot be run from inside GC, because the
* caller might be holding the commit lock, so %-EAGAIN is returned instead;
* And this error code means that the caller has to run commit, and re-run GC
* if there is still no free space.
*
* There are many reasons why this function may return %-EAGAIN:
* o the log is full and there is no space to write an LEB reference for
* @c->gc_lnum;
* o the journal is too large and exceeds size limitations;
* o GC moved indexing LEBs, but they can be used only after the commit;
* o the shrinker fails to find clean znodes to free and requests the commit;
* o etc.
*
* Note, if the file-system is close to be full, this function may return
* %-EAGAIN infinitely, so the caller has to limit amount of re-invocations of
* the function. E.g., this happens if the limits on the journal size are too
* tough and GC writes too much to the journal before an LEB is freed. This
* might also mean that the journal is too large, and the TNC becomes to big,
* so that the shrinker is constantly called, finds not clean znodes to free,
* and requests commit. Well, this may also happen if the journal is all right,
* but another kernel process consumes too much memory. Anyway, infinite
* %-EAGAIN may happen, but in some extreme/misconfiguration cases.
*/
int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
{
int i, err, ret, min_space = c->dead_wm;
struct ubifs_lprops lp;
struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
ubifs_assert_cmt_locked(c);
ubifs_assert(!c->ro_media && !c->ro_mount);
if (ubifs_gc_should_commit(c))
return -EAGAIN;
mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
if (c->ro_error) {
ret = -EROFS;
goto out_unlock;
}
/* We expect the write-buffer to be empty on entry */
ubifs_assert(!wbuf->used);
for (i = 0; ; i++) {
int space_before, space_after;
cond_resched();
/* Give the commit an opportunity to run */
if (ubifs_gc_should_commit(c)) {
ret = -EAGAIN;
break;
}
if (i > SOFT_LEBS_LIMIT && !list_empty(&c->idx_gc)) {
/*
* We've done enough iterations. Indexing LEBs were
* moved and will be available after the commit.
*/
dbg_gc("soft limit, some index LEBs GC'ed, -EAGAIN");
ubifs_commit_required(c);
ret = -EAGAIN;
break;
}
if (i > HARD_LEBS_LIMIT) {
/*
* We've moved too many LEBs and have not made
* progress, give up.
*/
dbg_gc("hard limit, -ENOSPC");
ret = -ENOSPC;
break;
}
/*
* Empty and freeable LEBs can turn up while we waited for
* the wbuf lock, or while we have been running GC. In that
* case, we should just return one of those instead of
* continuing to GC dirty LEBs. Hence we request
* 'ubifs_find_dirty_leb()' to return an empty LEB if it can.
*/
ret = ubifs_find_dirty_leb(c, &lp, min_space, anyway ? 0 : 1);
if (ret) {
if (ret == -ENOSPC)
dbg_gc("no more dirty LEBs");
break;
}
dbg_gc("found LEB %d: free %d, dirty %d, sum %d (min. space %d)",
lp.lnum, lp.free, lp.dirty, lp.free + lp.dirty,
min_space);
space_before = c->leb_size - wbuf->offs - wbuf->used;
if (wbuf->lnum == -1)
space_before = 0;
ret = ubifs_garbage_collect_leb(c, &lp);
if (ret < 0) {
if (ret == -EAGAIN) {
/*
* This is not error, so we have to return the
* LEB to lprops. But if 'ubifs_return_leb()'
* fails, its failure code is propagated to the
* caller instead of the original '-EAGAIN'.
*/
err = ubifs_return_leb(c, lp.lnum);
if (err)
ret = err;
break;
}
goto out;
}
if (ret == LEB_FREED) {
/* An LEB has been freed and is ready for use */
dbg_gc("LEB %d freed, return", lp.lnum);
ret = lp.lnum;
break;
}
if (ret == LEB_FREED_IDX) {
/*
* This was an indexing LEB and it cannot be
* immediately used. And instead of requesting the
* commit straight away, we try to garbage collect some
* more.
*/
dbg_gc("indexing LEB %d freed, continue", lp.lnum);
continue;
}
ubifs_assert(ret == LEB_RETAINED);
space_after = c->leb_size - wbuf->offs - wbuf->used;
dbg_gc("LEB %d retained, freed %d bytes", lp.lnum,
space_after - space_before);
if (space_after > space_before) {
/* GC makes progress, keep working */
min_space >>= 1;
if (min_space < c->dead_wm)
min_space = c->dead_wm;
continue;
}
dbg_gc("did not make progress");
/*
* GC moved an LEB bud have not done any progress. This means
* that the previous GC head LEB contained too few free space
* and the LEB which was GC'ed contained only large nodes which
* did not fit that space.
*
* We can do 2 things:
* 1. pick another LEB in a hope it'll contain a small node
* which will fit the space we have at the end of current GC
* head LEB, but there is no guarantee, so we try this out
* unless we have already been working for too long;
* 2. request an LEB with more dirty space, which will force
* 'ubifs_find_dirty_leb()' to start scanning the lprops
* table, instead of just picking one from the heap
* (previously it already picked the dirtiest LEB).
*/
if (i < SOFT_LEBS_LIMIT) {
dbg_gc("try again");
continue;
}
min_space <<= 1;
if (min_space > c->dark_wm)
min_space = c->dark_wm;
dbg_gc("set min. space to %d", min_space);
}
if (ret == -ENOSPC && !list_empty(&c->idx_gc)) {
dbg_gc("no space, some index LEBs GC'ed, -EAGAIN");
ubifs_commit_required(c);
ret = -EAGAIN;
}
err = ubifs_wbuf_sync_nolock(wbuf);
if (!err)
err = ubifs_leb_unmap(c, c->gc_lnum);
if (err) {
ret = err;
goto out;
}
out_unlock:
mutex_unlock(&wbuf->io_mutex);
return ret;
out:
ubifs_assert(ret < 0);
ubifs_assert(ret != -ENOSPC && ret != -EAGAIN);
ubifs_wbuf_sync_nolock(wbuf);
ubifs_ro_mode(c, ret);
mutex_unlock(&wbuf->io_mutex);
ubifs_return_leb(c, lp.lnum);
return ret;
}
/**
* ubifs_gc_start_commit - garbage collection at start of commit.
* @c: UBIFS file-system description object
*
* If a LEB has only dirty and free space, then we may safely unmap it and make
* it free. Note, we cannot do this with indexing LEBs because dirty space may
* correspond index nodes that are required for recovery. In that case, the
* LEB cannot be unmapped until after the next commit.
*
* This function returns %0 upon success and a negative error code upon failure.
*/
int ubifs_gc_start_commit(struct ubifs_info *c)
{
struct ubifs_gced_idx_leb *idx_gc;
const struct ubifs_lprops *lp;
int err = 0, flags;
ubifs_get_lprops(c);
/*
* Unmap (non-index) freeable LEBs. Note that recovery requires that all
* wbufs are sync'd before this, which is done in 'do_commit()'.
*/
while (1) {
lp = ubifs_fast_find_freeable(c);
if (IS_ERR(lp)) {
err = PTR_ERR(lp);
goto out;
}
if (!lp)
break;
ubifs_assert(!(lp->flags & LPROPS_TAKEN));
ubifs_assert(!(lp->flags & LPROPS_INDEX));
err = ubifs_leb_unmap(c, lp->lnum);
if (err)
goto out;
lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0);
if (IS_ERR(lp)) {
err = PTR_ERR(lp);
goto out;
}
ubifs_assert(!(lp->flags & LPROPS_TAKEN));
ubifs_assert(!(lp->flags & LPROPS_INDEX));
}
/* Mark GC'd index LEBs OK to unmap after this commit finishes */
list_for_each_entry(idx_gc, &c->idx_gc, list)
idx_gc->unmap = 1;
/* Record index freeable LEBs for unmapping after commit */
while (1) {
lp = ubifs_fast_find_frdi_idx(c);
if (IS_ERR(lp)) {
err = PTR_ERR(lp);
goto out;
}
if (!lp)
break;
idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS);
if (!idx_gc) {
err = -ENOMEM;
goto out;
}
ubifs_assert(!(lp->flags & LPROPS_TAKEN));
ubifs_assert(lp->flags & LPROPS_INDEX);
/* Don't release the LEB until after the next commit */
flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX;
lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1);
if (IS_ERR(lp)) {
err = PTR_ERR(lp);
kfree(idx_gc);
goto out;
}
ubifs_assert(lp->flags & LPROPS_TAKEN);
ubifs_assert(!(lp->flags & LPROPS_INDEX));
idx_gc->lnum = lp->lnum;
idx_gc->unmap = 1;
list_add(&idx_gc->list, &c->idx_gc);
}
out:
ubifs_release_lprops(c);
return err;
}
/**
* ubifs_gc_end_commit - garbage collection at end of commit.
* @c: UBIFS file-system description object
*
* This function completes out-of-place garbage collection of index LEBs.
*/
int ubifs_gc_end_commit(struct ubifs_info *c)
{
struct ubifs_gced_idx_leb *idx_gc, *tmp;
struct ubifs_wbuf *wbuf;
int err = 0;
wbuf = &c->jheads[GCHD].wbuf;
mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
list_for_each_entry_safe(idx_gc, tmp, &c->idx_gc, list)
if (idx_gc->unmap) {
dbg_gc("LEB %d", idx_gc->lnum);
err = ubifs_leb_unmap(c, idx_gc->lnum);
if (err)
goto out;
err = ubifs_change_one_lp(c, idx_gc->lnum, LPROPS_NC,
LPROPS_NC, 0, LPROPS_TAKEN, -1);
if (err)
goto out;
list_del(&idx_gc->list);
kfree(idx_gc);
}
out:
mutex_unlock(&wbuf->io_mutex);
return err;
}
/**
* ubifs_destroy_idx_gc - destroy idx_gc list.
* @c: UBIFS file-system description object
*
* This function destroys the @c->idx_gc list. It is called when unmounting
* so locks are not needed. Returns zero in case of success and a negative
* error code in case of failure.
*/
void ubifs_destroy_idx_gc(struct ubifs_info *c)
{
while (!list_empty(&c->idx_gc)) {
struct ubifs_gced_idx_leb *idx_gc;
idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb,
list);
c->idx_gc_cnt -= 1;
list_del(&idx_gc->list);
kfree(idx_gc);
}
}
/**
* ubifs_get_idx_gc_leb - get a LEB from GC'd index LEB list.
* @c: UBIFS file-system description object
*
* Called during start commit so locks are not needed.
*/
int ubifs_get_idx_gc_leb(struct ubifs_info *c)
{
struct ubifs_gced_idx_leb *idx_gc;
int lnum;
if (list_empty(&c->idx_gc))
return -ENOSPC;
idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, list);
lnum = idx_gc->lnum;
/* c->idx_gc_cnt is updated by the caller when lprops are updated */
list_del(&idx_gc->list);
kfree(idx_gc);
return lnum;
}

1150
fs/ubifs/io.c Normal file

File diff suppressed because it is too large Load diff

205
fs/ubifs/ioctl.c Normal file
View file

@ -0,0 +1,205 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
* Copyright (C) 2006, 2007 University of Szeged, Hungary
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Zoltan Sogor
* Artem Bityutskiy (Битюцкий Артём)
* Adrian Hunter
*/
/* This file implements EXT2-compatible extended attribute ioctl() calls */
#include <linux/compat.h>
#include <linux/mount.h>
#include "ubifs.h"
/**
* ubifs_set_inode_flags - set VFS inode flags.
* @inode: VFS inode to set flags for
*
* This function propagates flags from UBIFS inode object to VFS inode object.
*/
void ubifs_set_inode_flags(struct inode *inode)
{
unsigned int flags = ubifs_inode(inode)->flags;
inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_DIRSYNC);
if (flags & UBIFS_SYNC_FL)
inode->i_flags |= S_SYNC;
if (flags & UBIFS_APPEND_FL)
inode->i_flags |= S_APPEND;
if (flags & UBIFS_IMMUTABLE_FL)
inode->i_flags |= S_IMMUTABLE;
if (flags & UBIFS_DIRSYNC_FL)
inode->i_flags |= S_DIRSYNC;
}
/*
* ioctl2ubifs - convert ioctl inode flags to UBIFS inode flags.
* @ioctl_flags: flags to convert
*
* This function convert ioctl flags (@FS_COMPR_FL, etc) to UBIFS inode flags
* (@UBIFS_COMPR_FL, etc).
*/
static int ioctl2ubifs(int ioctl_flags)
{
int ubifs_flags = 0;
if (ioctl_flags & FS_COMPR_FL)
ubifs_flags |= UBIFS_COMPR_FL;
if (ioctl_flags & FS_SYNC_FL)
ubifs_flags |= UBIFS_SYNC_FL;
if (ioctl_flags & FS_APPEND_FL)
ubifs_flags |= UBIFS_APPEND_FL;
if (ioctl_flags & FS_IMMUTABLE_FL)
ubifs_flags |= UBIFS_IMMUTABLE_FL;
if (ioctl_flags & FS_DIRSYNC_FL)
ubifs_flags |= UBIFS_DIRSYNC_FL;
return ubifs_flags;
}
/*
* ubifs2ioctl - convert UBIFS inode flags to ioctl inode flags.
* @ubifs_flags: flags to convert
*
* This function convert UBIFS (@UBIFS_COMPR_FL, etc) to ioctl flags
* (@FS_COMPR_FL, etc).
*/
static int ubifs2ioctl(int ubifs_flags)
{
int ioctl_flags = 0;
if (ubifs_flags & UBIFS_COMPR_FL)
ioctl_flags |= FS_COMPR_FL;
if (ubifs_flags & UBIFS_SYNC_FL)
ioctl_flags |= FS_SYNC_FL;
if (ubifs_flags & UBIFS_APPEND_FL)
ioctl_flags |= FS_APPEND_FL;
if (ubifs_flags & UBIFS_IMMUTABLE_FL)
ioctl_flags |= FS_IMMUTABLE_FL;
if (ubifs_flags & UBIFS_DIRSYNC_FL)
ioctl_flags |= FS_DIRSYNC_FL;
return ioctl_flags;
}
static int setflags(struct inode *inode, int flags)
{
int oldflags, err, release;
struct ubifs_inode *ui = ubifs_inode(inode);
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct ubifs_budget_req req = { .dirtied_ino = 1,
.dirtied_ino_d = ui->data_len };
err = ubifs_budget_space(c, &req);
if (err)
return err;
/*
* The IMMUTABLE and APPEND_ONLY flags can only be changed by
* the relevant capability.
*/
mutex_lock(&ui->ui_mutex);
oldflags = ubifs2ioctl(ui->flags);
if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
err = -EPERM;
goto out_unlock;
}
}
ui->flags = ioctl2ubifs(flags);
ubifs_set_inode_flags(inode);
inode->i_ctime = ubifs_current_time(inode);
release = ui->dirty;
mark_inode_dirty_sync(inode);
mutex_unlock(&ui->ui_mutex);
if (release)
ubifs_release_budget(c, &req);
if (IS_SYNC(inode))
err = write_inode_now(inode, 1);
return err;
out_unlock:
ubifs_err("can't modify inode %lu attributes", inode->i_ino);
mutex_unlock(&ui->ui_mutex);
ubifs_release_budget(c, &req);
return err;
}
long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
int flags, err;
struct inode *inode = file_inode(file);
switch (cmd) {
case FS_IOC_GETFLAGS:
flags = ubifs2ioctl(ubifs_inode(inode)->flags);
dbg_gen("get flags: %#x, i_flags %#x", flags, inode->i_flags);
return put_user(flags, (int __user *) arg);
case FS_IOC_SETFLAGS: {
if (IS_RDONLY(inode))
return -EROFS;
if (!inode_owner_or_capable(inode))
return -EACCES;
if (get_user(flags, (int __user *) arg))
return -EFAULT;
if (!S_ISDIR(inode->i_mode))
flags &= ~FS_DIRSYNC_FL;
/*
* Make sure the file-system is read-write and make sure it
* will not become read-only while we are changing the flags.
*/
err = mnt_want_write_file(file);
if (err)
return err;
dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags);
err = setflags(inode, flags);
mnt_drop_write_file(file);
return err;
}
default:
return -ENOTTY;
}
}
#ifdef CONFIG_COMPAT
long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
case FS_IOC32_GETFLAGS:
cmd = FS_IOC_GETFLAGS;
break;
case FS_IOC32_SETFLAGS:
cmd = FS_IOC_SETFLAGS;
break;
default:
return -ENOIOCTLCMD;
}
return ubifs_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
}
#endif

1460
fs/ubifs/journal.c Normal file

File diff suppressed because it is too large Load diff

548
fs/ubifs/key.h Normal file
View file

@ -0,0 +1,548 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Artem Bityutskiy (Битюцкий Артём)
* Adrian Hunter
*/
/*
* This header contains various key-related definitions and helper function.
* UBIFS allows several key schemes, so we access key fields only via these
* helpers. At the moment only one key scheme is supported.
*
* Simple key scheme
* ~~~~~~~~~~~~~~~~~
*
* Keys are 64-bits long. First 32-bits are inode number (parent inode number
* in case of direntry key). Next 3 bits are node type. The last 29 bits are
* 4KiB offset in case of inode node, and direntry hash in case of a direntry
* node. We use "r5" hash borrowed from reiserfs.
*/
#ifndef __UBIFS_KEY_H__
#define __UBIFS_KEY_H__
/**
* key_mask_hash - mask a valid hash value.
* @val: value to be masked
*
* We use hash values as offset in directories, so values %0 and %1 are
* reserved for "." and "..". %2 is reserved for "end of readdir" marker. This
* function makes sure the reserved values are not used.
*/
static inline uint32_t key_mask_hash(uint32_t hash)
{
hash &= UBIFS_S_KEY_HASH_MASK;
if (unlikely(hash <= 2))
hash += 3;
return hash;
}
/**
* key_r5_hash - R5 hash function (borrowed from reiserfs).
* @s: direntry name
* @len: name length
*/
static inline uint32_t key_r5_hash(const char *s, int len)
{
uint32_t a = 0;
const signed char *str = (const signed char *)s;
while (*str) {
a += *str << 4;
a += *str >> 4;
a *= 11;
str++;
}
return key_mask_hash(a);
}
/**
* key_test_hash - testing hash function.
* @str: direntry name
* @len: name length
*/
static inline uint32_t key_test_hash(const char *str, int len)
{
uint32_t a = 0;
len = min_t(uint32_t, len, 4);
memcpy(&a, str, len);
return key_mask_hash(a);
}
/**
* ino_key_init - initialize inode key.
* @c: UBIFS file-system description object
* @key: key to initialize
* @inum: inode number
*/
static inline void ino_key_init(const struct ubifs_info *c,
union ubifs_key *key, ino_t inum)
{
key->u32[0] = inum;
key->u32[1] = UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS;
}
/**
* ino_key_init_flash - initialize on-flash inode key.
* @c: UBIFS file-system description object
* @k: key to initialize
* @inum: inode number
*/
static inline void ino_key_init_flash(const struct ubifs_info *c, void *k,
ino_t inum)
{
union ubifs_key *key = k;
key->j32[0] = cpu_to_le32(inum);
key->j32[1] = cpu_to_le32(UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS);
memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
}
/**
* lowest_ino_key - get the lowest possible inode key.
* @c: UBIFS file-system description object
* @key: key to initialize
* @inum: inode number
*/
static inline void lowest_ino_key(const struct ubifs_info *c,
union ubifs_key *key, ino_t inum)
{
key->u32[0] = inum;
key->u32[1] = 0;
}
/**
* highest_ino_key - get the highest possible inode key.
* @c: UBIFS file-system description object
* @key: key to initialize
* @inum: inode number
*/
static inline void highest_ino_key(const struct ubifs_info *c,
union ubifs_key *key, ino_t inum)
{
key->u32[0] = inum;
key->u32[1] = 0xffffffff;
}
/**
* dent_key_init - initialize directory entry key.
* @c: UBIFS file-system description object
* @key: key to initialize
* @inum: parent inode number
* @nm: direntry name and length
*/
static inline void dent_key_init(const struct ubifs_info *c,
union ubifs_key *key, ino_t inum,
const struct qstr *nm)
{
uint32_t hash = c->key_hash(nm->name, nm->len);
ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
key->u32[0] = inum;
key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS);
}
/**
* dent_key_init_hash - initialize directory entry key without re-calculating
* hash function.
* @c: UBIFS file-system description object
* @key: key to initialize
* @inum: parent inode number
* @hash: direntry name hash
*/
static inline void dent_key_init_hash(const struct ubifs_info *c,
union ubifs_key *key, ino_t inum,
uint32_t hash)
{
ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
key->u32[0] = inum;
key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS);
}
/**
* dent_key_init_flash - initialize on-flash directory entry key.
* @c: UBIFS file-system description object
* @k: key to initialize
* @inum: parent inode number
* @nm: direntry name and length
*/
static inline void dent_key_init_flash(const struct ubifs_info *c, void *k,
ino_t inum, const struct qstr *nm)
{
union ubifs_key *key = k;
uint32_t hash = c->key_hash(nm->name, nm->len);
ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
key->j32[0] = cpu_to_le32(inum);
key->j32[1] = cpu_to_le32(hash |
(UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS));
memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
}
/**
* lowest_dent_key - get the lowest possible directory entry key.
* @c: UBIFS file-system description object
* @key: where to store the lowest key
* @inum: parent inode number
*/
static inline void lowest_dent_key(const struct ubifs_info *c,
union ubifs_key *key, ino_t inum)
{
key->u32[0] = inum;
key->u32[1] = UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS;
}
/**
* xent_key_init - initialize extended attribute entry key.
* @c: UBIFS file-system description object
* @key: key to initialize
* @inum: host inode number
* @nm: extended attribute entry name and length
*/
static inline void xent_key_init(const struct ubifs_info *c,
union ubifs_key *key, ino_t inum,
const struct qstr *nm)
{
uint32_t hash = c->key_hash(nm->name, nm->len);
ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
key->u32[0] = inum;
key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS);
}
/**
* xent_key_init_flash - initialize on-flash extended attribute entry key.
* @c: UBIFS file-system description object
* @k: key to initialize
* @inum: host inode number
* @nm: extended attribute entry name and length
*/
static inline void xent_key_init_flash(const struct ubifs_info *c, void *k,
ino_t inum, const struct qstr *nm)
{
union ubifs_key *key = k;
uint32_t hash = c->key_hash(nm->name, nm->len);
ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
key->j32[0] = cpu_to_le32(inum);
key->j32[1] = cpu_to_le32(hash |
(UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS));
memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
}
/**
* lowest_xent_key - get the lowest possible extended attribute entry key.
* @c: UBIFS file-system description object
* @key: where to store the lowest key
* @inum: host inode number
*/
static inline void lowest_xent_key(const struct ubifs_info *c,
union ubifs_key *key, ino_t inum)
{
key->u32[0] = inum;
key->u32[1] = UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS;
}
/**
* data_key_init - initialize data key.
* @c: UBIFS file-system description object
* @key: key to initialize
* @inum: inode number
* @block: block number
*/
static inline void data_key_init(const struct ubifs_info *c,
union ubifs_key *key, ino_t inum,
unsigned int block)
{
ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK));
key->u32[0] = inum;
key->u32[1] = block | (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS);
}
/**
* highest_data_key - get the highest possible data key for an inode.
* @c: UBIFS file-system description object
* @key: key to initialize
* @inum: inode number
*/
static inline void highest_data_key(const struct ubifs_info *c,
union ubifs_key *key, ino_t inum)
{
data_key_init(c, key, inum, UBIFS_S_KEY_BLOCK_MASK);
}
/**
* trun_key_init - initialize truncation node key.
* @c: UBIFS file-system description object
* @key: key to initialize
* @inum: inode number
*
* Note, UBIFS does not have truncation keys on the media and this function is
* only used for purposes of replay.
*/
static inline void trun_key_init(const struct ubifs_info *c,
union ubifs_key *key, ino_t inum)
{
key->u32[0] = inum;
key->u32[1] = UBIFS_TRUN_KEY << UBIFS_S_KEY_BLOCK_BITS;
}
/**
* invalid_key_init - initialize invalid node key.
* @c: UBIFS file-system description object
* @key: key to initialize
*
* This is a helper function which marks a @key object as invalid.
*/
static inline void invalid_key_init(const struct ubifs_info *c,
union ubifs_key *key)
{
key->u32[0] = 0xDEADBEAF;
key->u32[1] = UBIFS_INVALID_KEY;
}
/**
* key_type - get key type.
* @c: UBIFS file-system description object
* @key: key to get type of
*/
static inline int key_type(const struct ubifs_info *c,
const union ubifs_key *key)
{
return key->u32[1] >> UBIFS_S_KEY_BLOCK_BITS;
}
/**
* key_type_flash - get type of a on-flash formatted key.
* @c: UBIFS file-system description object
* @k: key to get type of
*/
static inline int key_type_flash(const struct ubifs_info *c, const void *k)
{
const union ubifs_key *key = k;
return le32_to_cpu(key->j32[1]) >> UBIFS_S_KEY_BLOCK_BITS;
}
/**
* key_inum - fetch inode number from key.
* @c: UBIFS file-system description object
* @k: key to fetch inode number from
*/
static inline ino_t key_inum(const struct ubifs_info *c, const void *k)
{
const union ubifs_key *key = k;
return key->u32[0];
}
/**
* key_inum_flash - fetch inode number from an on-flash formatted key.
* @c: UBIFS file-system description object
* @k: key to fetch inode number from
*/
static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k)
{
const union ubifs_key *key = k;
return le32_to_cpu(key->j32[0]);
}
/**
* key_hash - get directory entry hash.
* @c: UBIFS file-system description object
* @key: the key to get hash from
*/
static inline uint32_t key_hash(const struct ubifs_info *c,
const union ubifs_key *key)
{
return key->u32[1] & UBIFS_S_KEY_HASH_MASK;
}
/**
* key_hash_flash - get directory entry hash from an on-flash formatted key.
* @c: UBIFS file-system description object
* @k: the key to get hash from
*/
static inline uint32_t key_hash_flash(const struct ubifs_info *c, const void *k)
{
const union ubifs_key *key = k;
return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_HASH_MASK;
}
/**
* key_block - get data block number.
* @c: UBIFS file-system description object
* @key: the key to get the block number from
*/
static inline unsigned int key_block(const struct ubifs_info *c,
const union ubifs_key *key)
{
return key->u32[1] & UBIFS_S_KEY_BLOCK_MASK;
}
/**
* key_block_flash - get data block number from an on-flash formatted key.
* @c: UBIFS file-system description object
* @k: the key to get the block number from
*/
static inline unsigned int key_block_flash(const struct ubifs_info *c,
const void *k)
{
const union ubifs_key *key = k;
return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_BLOCK_MASK;
}
/**
* key_read - transform a key to in-memory format.
* @c: UBIFS file-system description object
* @from: the key to transform
* @to: the key to store the result
*/
static inline void key_read(const struct ubifs_info *c, const void *from,
union ubifs_key *to)
{
const union ubifs_key *f = from;
to->u32[0] = le32_to_cpu(f->j32[0]);
to->u32[1] = le32_to_cpu(f->j32[1]);
}
/**
* key_write - transform a key from in-memory format.
* @c: UBIFS file-system description object
* @from: the key to transform
* @to: the key to store the result
*/
static inline void key_write(const struct ubifs_info *c,
const union ubifs_key *from, void *to)
{
union ubifs_key *t = to;
t->j32[0] = cpu_to_le32(from->u32[0]);
t->j32[1] = cpu_to_le32(from->u32[1]);
memset(to + 8, 0, UBIFS_MAX_KEY_LEN - 8);
}
/**
* key_write_idx - transform a key from in-memory format for the index.
* @c: UBIFS file-system description object
* @from: the key to transform
* @to: the key to store the result
*/
static inline void key_write_idx(const struct ubifs_info *c,
const union ubifs_key *from, void *to)
{
union ubifs_key *t = to;
t->j32[0] = cpu_to_le32(from->u32[0]);
t->j32[1] = cpu_to_le32(from->u32[1]);
}
/**
* key_copy - copy a key.
* @c: UBIFS file-system description object
* @from: the key to copy from
* @to: the key to copy to
*/
static inline void key_copy(const struct ubifs_info *c,
const union ubifs_key *from, union ubifs_key *to)
{
to->u64[0] = from->u64[0];
}
/**
* keys_cmp - compare keys.
* @c: UBIFS file-system description object
* @key1: the first key to compare
* @key2: the second key to compare
*
* This function compares 2 keys and returns %-1 if @key1 is less than
* @key2, %0 if the keys are equivalent and %1 if @key1 is greater than @key2.
*/
static inline int keys_cmp(const struct ubifs_info *c,
const union ubifs_key *key1,
const union ubifs_key *key2)
{
if (key1->u32[0] < key2->u32[0])
return -1;
if (key1->u32[0] > key2->u32[0])
return 1;
if (key1->u32[1] < key2->u32[1])
return -1;
if (key1->u32[1] > key2->u32[1])
return 1;
return 0;
}
/**
* keys_eq - determine if keys are equivalent.
* @c: UBIFS file-system description object
* @key1: the first key to compare
* @key2: the second key to compare
*
* This function compares 2 keys and returns %1 if @key1 is equal to @key2 and
* %0 if not.
*/
static inline int keys_eq(const struct ubifs_info *c,
const union ubifs_key *key1,
const union ubifs_key *key2)
{
if (key1->u32[0] != key2->u32[0])
return 0;
if (key1->u32[1] != key2->u32[1])
return 0;
return 1;
}
/**
* is_hash_key - is a key vulnerable to hash collisions.
* @c: UBIFS file-system description object
* @key: key
*
* This function returns %1 if @key is a hashed key or %0 otherwise.
*/
static inline int is_hash_key(const struct ubifs_info *c,
const union ubifs_key *key)
{
int type = key_type(c, key);
return type == UBIFS_DENT_KEY || type == UBIFS_XENT_KEY;
}
/**
* key_max_inode_size - get maximum file size allowed by current key format.
* @c: UBIFS file-system description object
*/
static inline unsigned long long key_max_inode_size(const struct ubifs_info *c)
{
switch (c->key_fmt) {
case UBIFS_SIMPLE_KEY_FMT:
return (1ULL << UBIFS_S_KEY_BLOCK_BITS) * UBIFS_BLOCK_SIZE;
default:
return 0;
}
}
#endif /* !__UBIFS_KEY_H__ */

753
fs/ubifs/log.c Normal file
View file

@ -0,0 +1,753 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Artem Bityutskiy (Битюцкий Артём)
* Adrian Hunter
*/
/*
* This file is a part of UBIFS journal implementation and contains various
* functions which manipulate the log. The log is a fixed area on the flash
* which does not contain any data but refers to buds. The log is a part of the
* journal.
*/
#include "ubifs.h"
static int dbg_check_bud_bytes(struct ubifs_info *c);
/**
* ubifs_search_bud - search bud LEB.
* @c: UBIFS file-system description object
* @lnum: logical eraseblock number to search
*
* This function searches bud LEB @lnum. Returns bud description object in case
* of success and %NULL if there is no bud with this LEB number.
*/
struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum)
{
struct rb_node *p;
struct ubifs_bud *bud;
spin_lock(&c->buds_lock);
p = c->buds.rb_node;
while (p) {
bud = rb_entry(p, struct ubifs_bud, rb);
if (lnum < bud->lnum)
p = p->rb_left;
else if (lnum > bud->lnum)
p = p->rb_right;
else {
spin_unlock(&c->buds_lock);
return bud;
}
}
spin_unlock(&c->buds_lock);
return NULL;
}
/**
* ubifs_get_wbuf - get the wbuf associated with a LEB, if there is one.
* @c: UBIFS file-system description object
* @lnum: logical eraseblock number to search
*
* This functions returns the wbuf for @lnum or %NULL if there is not one.
*/
struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum)
{
struct rb_node *p;
struct ubifs_bud *bud;
int jhead;
if (!c->jheads)
return NULL;
spin_lock(&c->buds_lock);
p = c->buds.rb_node;
while (p) {
bud = rb_entry(p, struct ubifs_bud, rb);
if (lnum < bud->lnum)
p = p->rb_left;
else if (lnum > bud->lnum)
p = p->rb_right;
else {
jhead = bud->jhead;
spin_unlock(&c->buds_lock);
return &c->jheads[jhead].wbuf;
}
}
spin_unlock(&c->buds_lock);
return NULL;
}
/**
* empty_log_bytes - calculate amount of empty space in the log.
* @c: UBIFS file-system description object
*/
static inline long long empty_log_bytes(const struct ubifs_info *c)
{
long long h, t;
h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs;
t = (long long)c->ltail_lnum * c->leb_size;
if (h > t)
return c->log_bytes - h + t;
else if (h != t)
return t - h;
else if (c->lhead_lnum != c->ltail_lnum)
return 0;
else
return c->log_bytes;
}
/**
* ubifs_add_bud - add bud LEB to the tree of buds and its journal head list.
* @c: UBIFS file-system description object
* @bud: the bud to add
*/
void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
{
struct rb_node **p, *parent = NULL;
struct ubifs_bud *b;
struct ubifs_jhead *jhead;
spin_lock(&c->buds_lock);
p = &c->buds.rb_node;
while (*p) {
parent = *p;
b = rb_entry(parent, struct ubifs_bud, rb);
ubifs_assert(bud->lnum != b->lnum);
if (bud->lnum < b->lnum)
p = &(*p)->rb_left;
else
p = &(*p)->rb_right;
}
rb_link_node(&bud->rb, parent, p);
rb_insert_color(&bud->rb, &c->buds);
if (c->jheads) {
jhead = &c->jheads[bud->jhead];
list_add_tail(&bud->list, &jhead->buds_list);
} else
ubifs_assert(c->replaying && c->ro_mount);
/*
* Note, although this is a new bud, we anyway account this space now,
* before any data has been written to it, because this is about to
* guarantee fixed mount time, and this bud will anyway be read and
* scanned.
*/
c->bud_bytes += c->leb_size - bud->start;
dbg_log("LEB %d:%d, jhead %s, bud_bytes %lld", bud->lnum,
bud->start, dbg_jhead(bud->jhead), c->bud_bytes);
spin_unlock(&c->buds_lock);
}
/**
* ubifs_add_bud_to_log - add a new bud to the log.
* @c: UBIFS file-system description object
* @jhead: journal head the bud belongs to
* @lnum: LEB number of the bud
* @offs: starting offset of the bud
*
* This function writes reference node for the new bud LEB @lnum it to the log,
* and adds it to the buds tress. It also makes sure that log size does not
* exceed the 'c->max_bud_bytes' limit. Returns zero in case of success,
* %-EAGAIN if commit is required, and a negative error codes in case of
* failure.
*/
int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
{
int err;
struct ubifs_bud *bud;
struct ubifs_ref_node *ref;
bud = kmalloc(sizeof(struct ubifs_bud), GFP_NOFS);
if (!bud)
return -ENOMEM;
ref = kzalloc(c->ref_node_alsz, GFP_NOFS);
if (!ref) {
kfree(bud);
return -ENOMEM;
}
mutex_lock(&c->log_mutex);
ubifs_assert(!c->ro_media && !c->ro_mount);
if (c->ro_error) {
err = -EROFS;
goto out_unlock;
}
/* Make sure we have enough space in the log */
if (empty_log_bytes(c) - c->ref_node_alsz < c->min_log_bytes) {
dbg_log("not enough log space - %lld, required %d",
empty_log_bytes(c), c->min_log_bytes);
ubifs_commit_required(c);
err = -EAGAIN;
goto out_unlock;
}
/*
* Make sure the amount of space in buds will not exceed the
* 'c->max_bud_bytes' limit, because we want to guarantee mount time
* limits.
*
* It is not necessary to hold @c->buds_lock when reading @c->bud_bytes
* because we are holding @c->log_mutex. All @c->bud_bytes take place
* when both @c->log_mutex and @c->bud_bytes are locked.
*/
if (c->bud_bytes + c->leb_size - offs > c->max_bud_bytes) {
dbg_log("bud bytes %lld (%lld max), require commit",
c->bud_bytes, c->max_bud_bytes);
ubifs_commit_required(c);
err = -EAGAIN;
goto out_unlock;
}
/*
* If the journal is full enough - start background commit. Note, it is
* OK to read 'c->cmt_state' without spinlock because integer reads
* are atomic in the kernel.
*/
if (c->bud_bytes >= c->bg_bud_bytes &&
c->cmt_state == COMMIT_RESTING) {
dbg_log("bud bytes %lld (%lld max), initiate BG commit",
c->bud_bytes, c->max_bud_bytes);
ubifs_request_bg_commit(c);
}
bud->lnum = lnum;
bud->start = offs;
bud->jhead = jhead;
ref->ch.node_type = UBIFS_REF_NODE;
ref->lnum = cpu_to_le32(bud->lnum);
ref->offs = cpu_to_le32(bud->start);
ref->jhead = cpu_to_le32(jhead);
if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
ubifs_assert(c->lhead_lnum != c->ltail_lnum);
c->lhead_offs = 0;
}
if (c->lhead_offs == 0) {
/* Must ensure next log LEB has been unmapped */
err = ubifs_leb_unmap(c, c->lhead_lnum);
if (err)
goto out_unlock;
}
if (bud->start == 0) {
/*
* Before writing the LEB reference which refers an empty LEB
* to the log, we have to make sure it is mapped, because
* otherwise we'd risk to refer an LEB with garbage in case of
* an unclean reboot, because the target LEB might have been
* unmapped, but not yet physically erased.
*/
err = ubifs_leb_map(c, bud->lnum);
if (err)
goto out_unlock;
}
dbg_log("write ref LEB %d:%d",
c->lhead_lnum, c->lhead_offs);
err = ubifs_write_node(c, ref, UBIFS_REF_NODE_SZ, c->lhead_lnum,
c->lhead_offs);
if (err)
goto out_unlock;
c->lhead_offs += c->ref_node_alsz;
ubifs_add_bud(c, bud);
mutex_unlock(&c->log_mutex);
kfree(ref);
return 0;
out_unlock:
mutex_unlock(&c->log_mutex);
kfree(ref);
kfree(bud);
return err;
}
/**
* remove_buds - remove used buds.
* @c: UBIFS file-system description object
*
* This function removes use buds from the buds tree. It does not remove the
* buds which are pointed to by journal heads.
*/
static void remove_buds(struct ubifs_info *c)
{
struct rb_node *p;
ubifs_assert(list_empty(&c->old_buds));
c->cmt_bud_bytes = 0;
spin_lock(&c->buds_lock);
p = rb_first(&c->buds);
while (p) {
struct rb_node *p1 = p;
struct ubifs_bud *bud;
struct ubifs_wbuf *wbuf;
p = rb_next(p);
bud = rb_entry(p1, struct ubifs_bud, rb);
wbuf = &c->jheads[bud->jhead].wbuf;
if (wbuf->lnum == bud->lnum) {
/*
* Do not remove buds which are pointed to by journal
* heads (non-closed buds).
*/
c->cmt_bud_bytes += wbuf->offs - bud->start;
dbg_log("preserve %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld",
bud->lnum, bud->start, dbg_jhead(bud->jhead),
wbuf->offs - bud->start, c->cmt_bud_bytes);
bud->start = wbuf->offs;
} else {
c->cmt_bud_bytes += c->leb_size - bud->start;
dbg_log("remove %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld",
bud->lnum, bud->start, dbg_jhead(bud->jhead),
c->leb_size - bud->start, c->cmt_bud_bytes);
rb_erase(p1, &c->buds);
/*
* If the commit does not finish, the recovery will need
* to replay the journal, in which case the old buds
* must be unchanged. Do not release them until post
* commit i.e. do not allow them to be garbage
* collected.
*/
list_move(&bud->list, &c->old_buds);
}
}
spin_unlock(&c->buds_lock);
}
/**
* ubifs_log_start_commit - start commit.
* @c: UBIFS file-system description object
* @ltail_lnum: return new log tail LEB number
*
* The commit operation starts with writing "commit start" node to the log and
* reference nodes for all journal heads which will define new journal after
* the commit has been finished. The commit start and reference nodes are
* written in one go to the nearest empty log LEB (hence, when commit is
* finished UBIFS may safely unmap all the previous log LEBs). This function
* returns zero in case of success and a negative error code in case of
* failure.
*/
int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
{
void *buf;
struct ubifs_cs_node *cs;
struct ubifs_ref_node *ref;
int err, i, max_len, len;
err = dbg_check_bud_bytes(c);
if (err)
return err;
max_len = UBIFS_CS_NODE_SZ + c->jhead_cnt * UBIFS_REF_NODE_SZ;
max_len = ALIGN(max_len, c->min_io_size);
buf = cs = kmalloc(max_len, GFP_NOFS);
if (!buf)
return -ENOMEM;
cs->ch.node_type = UBIFS_CS_NODE;
cs->cmt_no = cpu_to_le64(c->cmt_no);
ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0);
/*
* Note, we do not lock 'c->log_mutex' because this is the commit start
* phase and we are exclusively using the log. And we do not lock
* write-buffer because nobody can write to the file-system at this
* phase.
*/
len = UBIFS_CS_NODE_SZ;
for (i = 0; i < c->jhead_cnt; i++) {
int lnum = c->jheads[i].wbuf.lnum;
int offs = c->jheads[i].wbuf.offs;
if (lnum == -1 || offs == c->leb_size)
continue;
dbg_log("add ref to LEB %d:%d for jhead %s",
lnum, offs, dbg_jhead(i));
ref = buf + len;
ref->ch.node_type = UBIFS_REF_NODE;
ref->lnum = cpu_to_le32(lnum);
ref->offs = cpu_to_le32(offs);
ref->jhead = cpu_to_le32(i);
ubifs_prepare_node(c, ref, UBIFS_REF_NODE_SZ, 0);
len += UBIFS_REF_NODE_SZ;
}
ubifs_pad(c, buf + len, ALIGN(len, c->min_io_size) - len);
/* Switch to the next log LEB */
if (c->lhead_offs) {
c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
ubifs_assert(c->lhead_lnum != c->ltail_lnum);
c->lhead_offs = 0;
}
/* Must ensure next LEB has been unmapped */
err = ubifs_leb_unmap(c, c->lhead_lnum);
if (err)
goto out;
len = ALIGN(len, c->min_io_size);
dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len);
err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len);
if (err)
goto out;
*ltail_lnum = c->lhead_lnum;
c->lhead_offs += len;
if (c->lhead_offs == c->leb_size) {
c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
c->lhead_offs = 0;
}
remove_buds(c);
/*
* We have started the commit and now users may use the rest of the log
* for new writes.
*/
c->min_log_bytes = 0;
out:
kfree(buf);
return err;
}
/**
* ubifs_log_end_commit - end commit.
* @c: UBIFS file-system description object
* @ltail_lnum: new log tail LEB number
*
* This function is called on when the commit operation was finished. It
* moves log tail to new position and updates the master node so that it stores
* the new log tail LEB number. Returns zero in case of success and a negative
* error code in case of failure.
*/
int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum)
{
int err;
/*
* At this phase we have to lock 'c->log_mutex' because UBIFS allows FS
* writes during commit. Its only short "commit" start phase when
* writers are blocked.
*/
mutex_lock(&c->log_mutex);
dbg_log("old tail was LEB %d:0, new tail is LEB %d:0",
c->ltail_lnum, ltail_lnum);
c->ltail_lnum = ltail_lnum;
/*
* The commit is finished and from now on it must be guaranteed that
* there is always enough space for the next commit.
*/
c->min_log_bytes = c->leb_size;
spin_lock(&c->buds_lock);
c->bud_bytes -= c->cmt_bud_bytes;
spin_unlock(&c->buds_lock);
err = dbg_check_bud_bytes(c);
if (err)
goto out;
err = ubifs_write_master(c);
out:
mutex_unlock(&c->log_mutex);
return err;
}
/**
* ubifs_log_post_commit - things to do after commit is completed.
* @c: UBIFS file-system description object
* @old_ltail_lnum: old log tail LEB number
*
* Release buds only after commit is completed, because they must be unchanged
* if recovery is needed.
*
* Unmap log LEBs only after commit is completed, because they may be needed for
* recovery.
*
* This function returns %0 on success and a negative error code on failure.
*/
int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
{
int lnum, err = 0;
while (!list_empty(&c->old_buds)) {
struct ubifs_bud *bud;
bud = list_entry(c->old_buds.next, struct ubifs_bud, list);
err = ubifs_return_leb(c, bud->lnum);
if (err)
return err;
list_del(&bud->list);
kfree(bud);
}
mutex_lock(&c->log_mutex);
for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
lnum = ubifs_next_log_lnum(c, lnum)) {
dbg_log("unmap log LEB %d", lnum);
err = ubifs_leb_unmap(c, lnum);
if (err)
goto out;
}
out:
mutex_unlock(&c->log_mutex);
return err;
}
/**
* struct done_ref - references that have been done.
* @rb: rb-tree node
* @lnum: LEB number
*/
struct done_ref {
struct rb_node rb;
int lnum;
};
/**
* done_already - determine if a reference has been done already.
* @done_tree: rb-tree to store references that have been done
* @lnum: LEB number of reference
*
* This function returns %1 if the reference has been done, %0 if not, otherwise
* a negative error code is returned.
*/
static int done_already(struct rb_root *done_tree, int lnum)
{
struct rb_node **p = &done_tree->rb_node, *parent = NULL;
struct done_ref *dr;
while (*p) {
parent = *p;
dr = rb_entry(parent, struct done_ref, rb);
if (lnum < dr->lnum)
p = &(*p)->rb_left;
else if (lnum > dr->lnum)
p = &(*p)->rb_right;
else
return 1;
}
dr = kzalloc(sizeof(struct done_ref), GFP_NOFS);
if (!dr)
return -ENOMEM;
dr->lnum = lnum;
rb_link_node(&dr->rb, parent, p);
rb_insert_color(&dr->rb, done_tree);
return 0;
}
/**
* destroy_done_tree - destroy the done tree.
* @done_tree: done tree to destroy
*/
static void destroy_done_tree(struct rb_root *done_tree)
{
struct done_ref *dr, *n;
rbtree_postorder_for_each_entry_safe(dr, n, done_tree, rb)
kfree(dr);
}
/**
* add_node - add a node to the consolidated log.
* @c: UBIFS file-system description object
* @buf: buffer to which to add
* @lnum: LEB number to which to write is passed and returned here
* @offs: offset to where to write is passed and returned here
* @node: node to add
*
* This function returns %0 on success and a negative error code on failure.
*/
static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs,
void *node)
{
struct ubifs_ch *ch = node;
int len = le32_to_cpu(ch->len), remains = c->leb_size - *offs;
if (len > remains) {
int sz = ALIGN(*offs, c->min_io_size), err;
ubifs_pad(c, buf + *offs, sz - *offs);
err = ubifs_leb_change(c, *lnum, buf, sz);
if (err)
return err;
*lnum = ubifs_next_log_lnum(c, *lnum);
*offs = 0;
}
memcpy(buf + *offs, node, len);
*offs += ALIGN(len, 8);
return 0;
}
/**
* ubifs_consolidate_log - consolidate the log.
* @c: UBIFS file-system description object
*
* Repeated failed commits could cause the log to be full, but at least 1 LEB is
* needed for commit. This function rewrites the reference nodes in the log
* omitting duplicates, and failed CS nodes, and leaving no gaps.
*
* This function returns %0 on success and a negative error code on failure.
*/
int ubifs_consolidate_log(struct ubifs_info *c)
{
struct ubifs_scan_leb *sleb;
struct ubifs_scan_node *snod;
struct rb_root done_tree = RB_ROOT;
int lnum, err, first = 1, write_lnum, offs = 0;
void *buf;
dbg_rcvry("log tail LEB %d, log head LEB %d", c->ltail_lnum,
c->lhead_lnum);
buf = vmalloc(c->leb_size);
if (!buf)
return -ENOMEM;
lnum = c->ltail_lnum;
write_lnum = lnum;
while (1) {
sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0);
if (IS_ERR(sleb)) {
err = PTR_ERR(sleb);
goto out_free;
}
list_for_each_entry(snod, &sleb->nodes, list) {
switch (snod->type) {
case UBIFS_REF_NODE: {
struct ubifs_ref_node *ref = snod->node;
int ref_lnum = le32_to_cpu(ref->lnum);
err = done_already(&done_tree, ref_lnum);
if (err < 0)
goto out_scan;
if (err != 1) {
err = add_node(c, buf, &write_lnum,
&offs, snod->node);
if (err)
goto out_scan;
}
break;
}
case UBIFS_CS_NODE:
if (!first)
break;
err = add_node(c, buf, &write_lnum, &offs,
snod->node);
if (err)
goto out_scan;
first = 0;
break;
}
}
ubifs_scan_destroy(sleb);
if (lnum == c->lhead_lnum)
break;
lnum = ubifs_next_log_lnum(c, lnum);
}
if (offs) {
int sz = ALIGN(offs, c->min_io_size);
ubifs_pad(c, buf + offs, sz - offs);
err = ubifs_leb_change(c, write_lnum, buf, sz);
if (err)
goto out_free;
offs = ALIGN(offs, c->min_io_size);
}
destroy_done_tree(&done_tree);
vfree(buf);
if (write_lnum == c->lhead_lnum) {
ubifs_err("log is too full");
return -EINVAL;
}
/* Unmap remaining LEBs */
lnum = write_lnum;
do {
lnum = ubifs_next_log_lnum(c, lnum);
err = ubifs_leb_unmap(c, lnum);
if (err)
return err;
} while (lnum != c->lhead_lnum);
c->lhead_lnum = write_lnum;
c->lhead_offs = offs;
dbg_rcvry("new log head at %d:%d", c->lhead_lnum, c->lhead_offs);
return 0;
out_scan:
ubifs_scan_destroy(sleb);
out_free:
destroy_done_tree(&done_tree);
vfree(buf);
return err;
}
/**
* dbg_check_bud_bytes - make sure bud bytes calculation are all right.
* @c: UBIFS file-system description object
*
* This function makes sure the amount of flash space used by closed buds
* ('c->bud_bytes' is correct). Returns zero in case of success and %-EINVAL in
* case of failure.
*/
static int dbg_check_bud_bytes(struct ubifs_info *c)
{
int i, err = 0;
struct ubifs_bud *bud;
long long bud_bytes = 0;
if (!dbg_is_chk_gen(c))
return 0;
spin_lock(&c->buds_lock);
for (i = 0; i < c->jhead_cnt; i++)
list_for_each_entry(bud, &c->jheads[i].buds_list, list)
bud_bytes += c->leb_size - bud->start;
if (c->bud_bytes != bud_bytes) {
ubifs_err("bad bud_bytes %lld, calculated %lld",
c->bud_bytes, bud_bytes);
err = -EINVAL;
}
spin_unlock(&c->buds_lock);
return err;
}

1321
fs/ubifs/lprops.c Normal file

File diff suppressed because it is too large Load diff

2277
fs/ubifs/lpt.c Normal file

File diff suppressed because it is too large Load diff

2037
fs/ubifs/lpt_commit.c Normal file

File diff suppressed because it is too large Load diff

395
fs/ubifs/master.c Normal file
View file

@ -0,0 +1,395 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Artem Bityutskiy (Битюцкий Артём)
* Adrian Hunter
*/
/* This file implements reading and writing the master node */
#include "ubifs.h"
/**
* scan_for_master - search the valid master node.
* @c: UBIFS file-system description object
*
* This function scans the master node LEBs and search for the latest master
* node. Returns zero in case of success, %-EUCLEAN if there master area is
* corrupted and requires recovery, and a negative error code in case of
* failure.
*/
static int scan_for_master(struct ubifs_info *c)
{
struct ubifs_scan_leb *sleb;
struct ubifs_scan_node *snod;
int lnum, offs = 0, nodes_cnt;
lnum = UBIFS_MST_LNUM;
sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
if (IS_ERR(sleb))
return PTR_ERR(sleb);
nodes_cnt = sleb->nodes_cnt;
if (nodes_cnt > 0) {
snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
list);
if (snod->type != UBIFS_MST_NODE)
goto out_dump;
memcpy(c->mst_node, snod->node, snod->len);
offs = snod->offs;
}
ubifs_scan_destroy(sleb);
lnum += 1;
sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
if (IS_ERR(sleb))
return PTR_ERR(sleb);
if (sleb->nodes_cnt != nodes_cnt)
goto out;
if (!sleb->nodes_cnt)
goto out;
snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list);
if (snod->type != UBIFS_MST_NODE)
goto out_dump;
if (snod->offs != offs)
goto out;
if (memcmp((void *)c->mst_node + UBIFS_CH_SZ,
(void *)snod->node + UBIFS_CH_SZ,
UBIFS_MST_NODE_SZ - UBIFS_CH_SZ))
goto out;
c->mst_offs = offs;
ubifs_scan_destroy(sleb);
return 0;
out:
ubifs_scan_destroy(sleb);
return -EUCLEAN;
out_dump:
ubifs_err("unexpected node type %d master LEB %d:%d",
snod->type, lnum, snod->offs);
ubifs_scan_destroy(sleb);
return -EINVAL;
}
/**
* validate_master - validate master node.
* @c: UBIFS file-system description object
*
* This function validates data which was read from master node. Returns zero
* if the data is all right and %-EINVAL if not.
*/
static int validate_master(const struct ubifs_info *c)
{
long long main_sz;
int err;
if (c->max_sqnum >= SQNUM_WATERMARK) {
err = 1;
goto out;
}
if (c->cmt_no >= c->max_sqnum) {
err = 2;
goto out;
}
if (c->highest_inum >= INUM_WATERMARK) {
err = 3;
goto out;
}
if (c->lhead_lnum < UBIFS_LOG_LNUM ||
c->lhead_lnum >= UBIFS_LOG_LNUM + c->log_lebs ||
c->lhead_offs < 0 || c->lhead_offs >= c->leb_size ||
c->lhead_offs & (c->min_io_size - 1)) {
err = 4;
goto out;
}
if (c->zroot.lnum >= c->leb_cnt || c->zroot.lnum < c->main_first ||
c->zroot.offs >= c->leb_size || c->zroot.offs & 7) {
err = 5;
goto out;
}
if (c->zroot.len < c->ranges[UBIFS_IDX_NODE].min_len ||
c->zroot.len > c->ranges[UBIFS_IDX_NODE].max_len) {
err = 6;
goto out;
}
if (c->gc_lnum >= c->leb_cnt || c->gc_lnum < c->main_first) {
err = 7;
goto out;
}
if (c->ihead_lnum >= c->leb_cnt || c->ihead_lnum < c->main_first ||
c->ihead_offs % c->min_io_size || c->ihead_offs < 0 ||
c->ihead_offs > c->leb_size || c->ihead_offs & 7) {
err = 8;
goto out;
}
main_sz = (long long)c->main_lebs * c->leb_size;
if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) {
err = 9;
goto out;
}
if (c->lpt_lnum < c->lpt_first || c->lpt_lnum > c->lpt_last ||
c->lpt_offs < 0 || c->lpt_offs + c->nnode_sz > c->leb_size) {
err = 10;
goto out;
}
if (c->nhead_lnum < c->lpt_first || c->nhead_lnum > c->lpt_last ||
c->nhead_offs < 0 || c->nhead_offs % c->min_io_size ||
c->nhead_offs > c->leb_size) {
err = 11;
goto out;
}
if (c->ltab_lnum < c->lpt_first || c->ltab_lnum > c->lpt_last ||
c->ltab_offs < 0 ||
c->ltab_offs + c->ltab_sz > c->leb_size) {
err = 12;
goto out;
}
if (c->big_lpt && (c->lsave_lnum < c->lpt_first ||
c->lsave_lnum > c->lpt_last || c->lsave_offs < 0 ||
c->lsave_offs + c->lsave_sz > c->leb_size)) {
err = 13;
goto out;
}
if (c->lscan_lnum < c->main_first || c->lscan_lnum >= c->leb_cnt) {
err = 14;
goto out;
}
if (c->lst.empty_lebs < 0 || c->lst.empty_lebs > c->main_lebs - 2) {
err = 15;
goto out;
}
if (c->lst.idx_lebs < 0 || c->lst.idx_lebs > c->main_lebs - 1) {
err = 16;
goto out;
}
if (c->lst.total_free < 0 || c->lst.total_free > main_sz ||
c->lst.total_free & 7) {
err = 17;
goto out;
}
if (c->lst.total_dirty < 0 || (c->lst.total_dirty & 7)) {
err = 18;
goto out;
}
if (c->lst.total_used < 0 || (c->lst.total_used & 7)) {
err = 19;
goto out;
}
if (c->lst.total_free + c->lst.total_dirty +
c->lst.total_used > main_sz) {
err = 20;
goto out;
}
if (c->lst.total_dead + c->lst.total_dark +
c->lst.total_used + c->bi.old_idx_sz > main_sz) {
err = 21;
goto out;
}
if (c->lst.total_dead < 0 ||
c->lst.total_dead > c->lst.total_free + c->lst.total_dirty ||
c->lst.total_dead & 7) {
err = 22;
goto out;
}
if (c->lst.total_dark < 0 ||
c->lst.total_dark > c->lst.total_free + c->lst.total_dirty ||
c->lst.total_dark & 7) {
err = 23;
goto out;
}
return 0;
out:
ubifs_err("bad master node at offset %d error %d", c->mst_offs, err);
ubifs_dump_node(c, c->mst_node);
return -EINVAL;
}
/**
* ubifs_read_master - read master node.
* @c: UBIFS file-system description object
*
* This function finds and reads the master node during file-system mount. If
* the flash is empty, it creates default master node as well. Returns zero in
* case of success and a negative error code in case of failure.
*/
int ubifs_read_master(struct ubifs_info *c)
{
int err, old_leb_cnt;
c->mst_node = kzalloc(c->mst_node_alsz, GFP_KERNEL);
if (!c->mst_node)
return -ENOMEM;
err = scan_for_master(c);
if (err) {
if (err == -EUCLEAN)
err = ubifs_recover_master_node(c);
if (err)
/*
* Note, we do not free 'c->mst_node' here because the
* unmount routine will take care of this.
*/
return err;
}
/* Make sure that the recovery flag is clear */
c->mst_node->flags &= cpu_to_le32(~UBIFS_MST_RCVRY);
c->max_sqnum = le64_to_cpu(c->mst_node->ch.sqnum);
c->highest_inum = le64_to_cpu(c->mst_node->highest_inum);
c->cmt_no = le64_to_cpu(c->mst_node->cmt_no);
c->zroot.lnum = le32_to_cpu(c->mst_node->root_lnum);
c->zroot.offs = le32_to_cpu(c->mst_node->root_offs);
c->zroot.len = le32_to_cpu(c->mst_node->root_len);
c->lhead_lnum = le32_to_cpu(c->mst_node->log_lnum);
c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum);
c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum);
c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs);
c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size);
c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum);
c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs);
c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum);
c->nhead_offs = le32_to_cpu(c->mst_node->nhead_offs);
c->ltab_lnum = le32_to_cpu(c->mst_node->ltab_lnum);
c->ltab_offs = le32_to_cpu(c->mst_node->ltab_offs);
c->lsave_lnum = le32_to_cpu(c->mst_node->lsave_lnum);
c->lsave_offs = le32_to_cpu(c->mst_node->lsave_offs);
c->lscan_lnum = le32_to_cpu(c->mst_node->lscan_lnum);
c->lst.empty_lebs = le32_to_cpu(c->mst_node->empty_lebs);
c->lst.idx_lebs = le32_to_cpu(c->mst_node->idx_lebs);
old_leb_cnt = le32_to_cpu(c->mst_node->leb_cnt);
c->lst.total_free = le64_to_cpu(c->mst_node->total_free);
c->lst.total_dirty = le64_to_cpu(c->mst_node->total_dirty);
c->lst.total_used = le64_to_cpu(c->mst_node->total_used);
c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead);
c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark);
c->calc_idx_sz = c->bi.old_idx_sz;
if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))
c->no_orphs = 1;
if (old_leb_cnt != c->leb_cnt) {
/* The file system has been resized */
int growth = c->leb_cnt - old_leb_cnt;
if (c->leb_cnt < old_leb_cnt ||
c->leb_cnt < UBIFS_MIN_LEB_CNT) {
ubifs_err("bad leb_cnt on master node");
ubifs_dump_node(c, c->mst_node);
return -EINVAL;
}
dbg_mnt("Auto resizing (master) from %d LEBs to %d LEBs",
old_leb_cnt, c->leb_cnt);
c->lst.empty_lebs += growth;
c->lst.total_free += growth * (long long)c->leb_size;
c->lst.total_dark += growth * (long long)c->dark_wm;
/*
* Reflect changes back onto the master node. N.B. the master
* node gets written immediately whenever mounting (or
* remounting) in read-write mode, so we do not need to write it
* here.
*/
c->mst_node->leb_cnt = cpu_to_le32(c->leb_cnt);
c->mst_node->empty_lebs = cpu_to_le32(c->lst.empty_lebs);
c->mst_node->total_free = cpu_to_le64(c->lst.total_free);
c->mst_node->total_dark = cpu_to_le64(c->lst.total_dark);
}
err = validate_master(c);
if (err)
return err;
err = dbg_old_index_check_init(c, &c->zroot);
return err;
}
/**
* ubifs_write_master - write master node.
* @c: UBIFS file-system description object
*
* This function writes the master node. Returns zero in case of success and a
* negative error code in case of failure. The master node is written twice to
* enable recovery.
*/
int ubifs_write_master(struct ubifs_info *c)
{
int err, lnum, offs, len;
ubifs_assert(!c->ro_media && !c->ro_mount);
if (c->ro_error)
return -EROFS;
lnum = UBIFS_MST_LNUM;
offs = c->mst_offs + c->mst_node_alsz;
len = UBIFS_MST_NODE_SZ;
if (offs + UBIFS_MST_NODE_SZ > c->leb_size) {
err = ubifs_leb_unmap(c, lnum);
if (err)
return err;
offs = 0;
}
c->mst_offs = offs;
c->mst_node->highest_inum = cpu_to_le64(c->highest_inum);
err = ubifs_write_node(c, c->mst_node, len, lnum, offs);
if (err)
return err;
lnum += 1;
if (offs == 0) {
err = ubifs_leb_unmap(c, lnum);
if (err)
return err;
}
err = ubifs_write_node(c, c->mst_node, len, lnum, offs);
return err;
}

303
fs/ubifs/misc.h Normal file
View file

@ -0,0 +1,303 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Artem Bityutskiy (Битюцкий Артём)
* Adrian Hunter
*/
/*
* This file contains miscellaneous helper functions.
*/
#ifndef __UBIFS_MISC_H__
#define __UBIFS_MISC_H__
/**
* ubifs_zn_dirty - check if znode is dirty.
* @znode: znode to check
*
* This helper function returns %1 if @znode is dirty and %0 otherwise.
*/
static inline int ubifs_zn_dirty(const struct ubifs_znode *znode)
{
return !!test_bit(DIRTY_ZNODE, &znode->flags);
}
/**
* ubifs_zn_obsolete - check if znode is obsolete.
* @znode: znode to check
*
* This helper function returns %1 if @znode is obsolete and %0 otherwise.
*/
static inline int ubifs_zn_obsolete(const struct ubifs_znode *znode)
{
return !!test_bit(OBSOLETE_ZNODE, &znode->flags);
}
/**
* ubifs_zn_cow - check if znode has to be copied on write.
* @znode: znode to check
*
* This helper function returns %1 if @znode is has COW flag set and %0
* otherwise.
*/
static inline int ubifs_zn_cow(const struct ubifs_znode *znode)
{
return !!test_bit(COW_ZNODE, &znode->flags);
}
/**
* ubifs_wake_up_bgt - wake up background thread.
* @c: UBIFS file-system description object
*/
static inline void ubifs_wake_up_bgt(struct ubifs_info *c)
{
if (c->bgt && !c->need_bgt) {
c->need_bgt = 1;
wake_up_process(c->bgt);
}
}
/**
* ubifs_tnc_find_child - find next child in znode.
* @znode: znode to search at
* @start: the zbranch index to start at
*
* This helper function looks for znode child starting at index @start. Returns
* the child or %NULL if no children were found.
*/
static inline struct ubifs_znode *
ubifs_tnc_find_child(struct ubifs_znode *znode, int start)
{
while (start < znode->child_cnt) {
if (znode->zbranch[start].znode)
return znode->zbranch[start].znode;
start += 1;
}
return NULL;
}
/**
* ubifs_inode - get UBIFS inode information by VFS 'struct inode' object.
* @inode: the VFS 'struct inode' pointer
*/
static inline struct ubifs_inode *ubifs_inode(const struct inode *inode)
{
return container_of(inode, struct ubifs_inode, vfs_inode);
}
/**
* ubifs_compr_present - check if compressor was compiled in.
* @compr_type: compressor type to check
*
* This function returns %1 of compressor of type @compr_type is present, and
* %0 if not.
*/
static inline int ubifs_compr_present(int compr_type)
{
ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT);
return !!ubifs_compressors[compr_type]->capi_name;
}
/**
* ubifs_compr_name - get compressor name string by its type.
* @compr_type: compressor type
*
* This function returns compressor type string.
*/
static inline const char *ubifs_compr_name(int compr_type)
{
ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT);
return ubifs_compressors[compr_type]->name;
}
/**
* ubifs_wbuf_sync - synchronize write-buffer.
* @wbuf: write-buffer to synchronize
*
* This is the same as as 'ubifs_wbuf_sync_nolock()' but it does not assume
* that the write-buffer is already locked.
*/
static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf)
{
int err;
mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
err = ubifs_wbuf_sync_nolock(wbuf);
mutex_unlock(&wbuf->io_mutex);
return err;
}
/**
* ubifs_encode_dev - encode device node IDs.
* @dev: UBIFS device node information
* @rdev: device IDs to encode
*
* This is a helper function which encodes major/minor numbers of a device node
* into UBIFS device node description. We use standard Linux "new" and "huge"
* encodings.
*/
static inline int ubifs_encode_dev(union ubifs_dev_desc *dev, dev_t rdev)
{
if (new_valid_dev(rdev)) {
dev->new = cpu_to_le32(new_encode_dev(rdev));
return sizeof(dev->new);
} else {
dev->huge = cpu_to_le64(huge_encode_dev(rdev));
return sizeof(dev->huge);
}
}
/**
* ubifs_add_dirt - add dirty space to LEB properties.
* @c: the UBIFS file-system description object
* @lnum: LEB to add dirty space for
* @dirty: dirty space to add
*
* This is a helper function which increased amount of dirty LEB space. Returns
* zero in case of success and a negative error code in case of failure.
*/
static inline int ubifs_add_dirt(struct ubifs_info *c, int lnum, int dirty)
{
return ubifs_update_one_lp(c, lnum, LPROPS_NC, dirty, 0, 0);
}
/**
* ubifs_return_leb - return LEB to lprops.
* @c: the UBIFS file-system description object
* @lnum: LEB to return
*
* This helper function cleans the "taken" flag of a logical eraseblock in the
* lprops. Returns zero in case of success and a negative error code in case of
* failure.
*/
static inline int ubifs_return_leb(struct ubifs_info *c, int lnum)
{
return ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
LPROPS_TAKEN, 0);
}
/**
* ubifs_idx_node_sz - return index node size.
* @c: the UBIFS file-system description object
* @child_cnt: number of children of this index node
*/
static inline int ubifs_idx_node_sz(const struct ubifs_info *c, int child_cnt)
{
return UBIFS_IDX_NODE_SZ + (UBIFS_BRANCH_SZ + c->key_len) * child_cnt;
}
/**
* ubifs_idx_branch - return pointer to an index branch.
* @c: the UBIFS file-system description object
* @idx: index node
* @bnum: branch number
*/
static inline
struct ubifs_branch *ubifs_idx_branch(const struct ubifs_info *c,
const struct ubifs_idx_node *idx,
int bnum)
{
return (struct ubifs_branch *)((void *)idx->branches +
(UBIFS_BRANCH_SZ + c->key_len) * bnum);
}
/**
* ubifs_idx_key - return pointer to an index key.
* @c: the UBIFS file-system description object
* @idx: index node
*/
static inline void *ubifs_idx_key(const struct ubifs_info *c,
const struct ubifs_idx_node *idx)
{
return (void *)((struct ubifs_branch *)idx->branches)->key;
}
/**
* ubifs_current_time - round current time to time granularity.
* @inode: inode
*/
static inline struct timespec ubifs_current_time(struct inode *inode)
{
return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
}
/**
* ubifs_tnc_lookup - look up a file-system node.
* @c: UBIFS file-system description object
* @key: node key to lookup
* @node: the node is returned here
*
* This function look up and reads node with key @key. The caller has to make
* sure the @node buffer is large enough to fit the node. Returns zero in case
* of success, %-ENOENT if the node was not found, and a negative error code in
* case of failure.
*/
static inline int ubifs_tnc_lookup(struct ubifs_info *c,
const union ubifs_key *key, void *node)
{
return ubifs_tnc_locate(c, key, node, NULL, NULL);
}
/**
* ubifs_get_lprops - get reference to LEB properties.
* @c: the UBIFS file-system description object
*
* This function locks lprops. Lprops have to be unlocked by
* 'ubifs_release_lprops()'.
*/
static inline void ubifs_get_lprops(struct ubifs_info *c)
{
mutex_lock(&c->lp_mutex);
}
/**
* ubifs_release_lprops - release lprops lock.
* @c: the UBIFS file-system description object
*
* This function has to be called after each 'ubifs_get_lprops()' call to
* unlock lprops.
*/
static inline void ubifs_release_lprops(struct ubifs_info *c)
{
ubifs_assert(mutex_is_locked(&c->lp_mutex));
ubifs_assert(c->lst.empty_lebs >= 0 &&
c->lst.empty_lebs <= c->main_lebs);
mutex_unlock(&c->lp_mutex);
}
/**
* ubifs_next_log_lnum - switch to the next log LEB.
* @c: UBIFS file-system description object
* @lnum: current log LEB
*
* This helper function returns the log LEB number which goes next after LEB
* 'lnum'.
*/
static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum)
{
lnum += 1;
if (lnum > c->log_last)
lnum = UBIFS_LOG_LNUM;
return lnum;
}
#endif /* __UBIFS_MISC_H__ */

956
fs/ubifs/orphan.c Normal file
View file

@ -0,0 +1,956 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Author: Adrian Hunter
*/
#include "ubifs.h"
/*
* An orphan is an inode number whose inode node has been committed to the index
* with a link count of zero. That happens when an open file is deleted
* (unlinked) and then a commit is run. In the normal course of events the inode
* would be deleted when the file is closed. However in the case of an unclean
* unmount, orphans need to be accounted for. After an unclean unmount, the
* orphans' inodes must be deleted which means either scanning the entire index
* looking for them, or keeping a list on flash somewhere. This unit implements
* the latter approach.
*
* The orphan area is a fixed number of LEBs situated between the LPT area and
* the main area. The number of orphan area LEBs is specified when the file
* system is created. The minimum number is 1. The size of the orphan area
* should be so that it can hold the maximum number of orphans that are expected
* to ever exist at one time.
*
* The number of orphans that can fit in a LEB is:
*
* (c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)
*
* For example: a 15872 byte LEB can fit 1980 orphans so 1 LEB may be enough.
*
* Orphans are accumulated in a rb-tree. When an inode's link count drops to
* zero, the inode number is added to the rb-tree. It is removed from the tree
* when the inode is deleted. Any new orphans that are in the orphan tree when
* the commit is run, are written to the orphan area in 1 or more orphan nodes.
* If the orphan area is full, it is consolidated to make space. There is
* always enough space because validation prevents the user from creating more
* than the maximum number of orphans allowed.
*/
static int dbg_check_orphans(struct ubifs_info *c);
/**
* ubifs_add_orphan - add an orphan.
* @c: UBIFS file-system description object
* @inum: orphan inode number
*
* Add an orphan. This function is called when an inodes link count drops to
* zero.
*/
int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
{
struct ubifs_orphan *orphan, *o;
struct rb_node **p, *parent = NULL;
orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_NOFS);
if (!orphan)
return -ENOMEM;
orphan->inum = inum;
orphan->new = 1;
spin_lock(&c->orphan_lock);
if (c->tot_orphans >= c->max_orphans) {
spin_unlock(&c->orphan_lock);
kfree(orphan);
return -ENFILE;
}
p = &c->orph_tree.rb_node;
while (*p) {
parent = *p;
o = rb_entry(parent, struct ubifs_orphan, rb);
if (inum < o->inum)
p = &(*p)->rb_left;
else if (inum > o->inum)
p = &(*p)->rb_right;
else {
ubifs_err("orphaned twice");
spin_unlock(&c->orphan_lock);
kfree(orphan);
return 0;
}
}
c->tot_orphans += 1;
c->new_orphans += 1;
rb_link_node(&orphan->rb, parent, p);
rb_insert_color(&orphan->rb, &c->orph_tree);
list_add_tail(&orphan->list, &c->orph_list);
list_add_tail(&orphan->new_list, &c->orph_new);
spin_unlock(&c->orphan_lock);
dbg_gen("ino %lu", (unsigned long)inum);
return 0;
}
/**
* ubifs_delete_orphan - delete an orphan.
* @c: UBIFS file-system description object
* @inum: orphan inode number
*
* Delete an orphan. This function is called when an inode is deleted.
*/
void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
{
struct ubifs_orphan *o;
struct rb_node *p;
spin_lock(&c->orphan_lock);
p = c->orph_tree.rb_node;
while (p) {
o = rb_entry(p, struct ubifs_orphan, rb);
if (inum < o->inum)
p = p->rb_left;
else if (inum > o->inum)
p = p->rb_right;
else {
if (o->del) {
spin_unlock(&c->orphan_lock);
dbg_gen("deleted twice ino %lu",
(unsigned long)inum);
return;
}
if (o->cmt) {
o->del = 1;
o->dnext = c->orph_dnext;
c->orph_dnext = o;
spin_unlock(&c->orphan_lock);
dbg_gen("delete later ino %lu",
(unsigned long)inum);
return;
}
rb_erase(p, &c->orph_tree);
list_del(&o->list);
c->tot_orphans -= 1;
if (o->new) {
list_del(&o->new_list);
c->new_orphans -= 1;
}
spin_unlock(&c->orphan_lock);
kfree(o);
dbg_gen("inum %lu", (unsigned long)inum);
return;
}
}
spin_unlock(&c->orphan_lock);
ubifs_err("missing orphan ino %lu", (unsigned long)inum);
dump_stack();
}
/**
* ubifs_orphan_start_commit - start commit of orphans.
* @c: UBIFS file-system description object
*
* Start commit of orphans.
*/
int ubifs_orphan_start_commit(struct ubifs_info *c)
{
struct ubifs_orphan *orphan, **last;
spin_lock(&c->orphan_lock);
last = &c->orph_cnext;
list_for_each_entry(orphan, &c->orph_new, new_list) {
ubifs_assert(orphan->new);
ubifs_assert(!orphan->cmt);
orphan->new = 0;
orphan->cmt = 1;
*last = orphan;
last = &orphan->cnext;
}
*last = NULL;
c->cmt_orphans = c->new_orphans;
c->new_orphans = 0;
dbg_cmt("%d orphans to commit", c->cmt_orphans);
INIT_LIST_HEAD(&c->orph_new);
if (c->tot_orphans == 0)
c->no_orphs = 1;
else
c->no_orphs = 0;
spin_unlock(&c->orphan_lock);
return 0;
}
/**
* avail_orphs - calculate available space.
* @c: UBIFS file-system description object
*
* This function returns the number of orphans that can be written in the
* available space.
*/
static int avail_orphs(struct ubifs_info *c)
{
int avail_lebs, avail, gap;
avail_lebs = c->orph_lebs - (c->ohead_lnum - c->orph_first) - 1;
avail = avail_lebs *
((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64));
gap = c->leb_size - c->ohead_offs;
if (gap >= UBIFS_ORPH_NODE_SZ + sizeof(__le64))
avail += (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64);
return avail;
}
/**
* tot_avail_orphs - calculate total space.
* @c: UBIFS file-system description object
*
* This function returns the number of orphans that can be written in half
* the total space. That leaves half the space for adding new orphans.
*/
static int tot_avail_orphs(struct ubifs_info *c)
{
int avail_lebs, avail;
avail_lebs = c->orph_lebs;
avail = avail_lebs *
((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64));
return avail / 2;
}
/**
* do_write_orph_node - write a node to the orphan head.
* @c: UBIFS file-system description object
* @len: length of node
* @atomic: write atomically
*
* This function writes a node to the orphan head from the orphan buffer. If
* %atomic is not zero, then the write is done atomically. On success, %0 is
* returned, otherwise a negative error code is returned.
*/
static int do_write_orph_node(struct ubifs_info *c, int len, int atomic)
{
int err = 0;
if (atomic) {
ubifs_assert(c->ohead_offs == 0);
ubifs_prepare_node(c, c->orph_buf, len, 1);
len = ALIGN(len, c->min_io_size);
err = ubifs_leb_change(c, c->ohead_lnum, c->orph_buf, len);
} else {
if (c->ohead_offs == 0) {
/* Ensure LEB has been unmapped */
err = ubifs_leb_unmap(c, c->ohead_lnum);
if (err)
return err;
}
err = ubifs_write_node(c, c->orph_buf, len, c->ohead_lnum,
c->ohead_offs);
}
return err;
}
/**
* write_orph_node - write an orphan node.
* @c: UBIFS file-system description object
* @atomic: write atomically
*
* This function builds an orphan node from the cnext list and writes it to the
* orphan head. On success, %0 is returned, otherwise a negative error code
* is returned.
*/
static int write_orph_node(struct ubifs_info *c, int atomic)
{
struct ubifs_orphan *orphan, *cnext;
struct ubifs_orph_node *orph;
int gap, err, len, cnt, i;
ubifs_assert(c->cmt_orphans > 0);
gap = c->leb_size - c->ohead_offs;
if (gap < UBIFS_ORPH_NODE_SZ + sizeof(__le64)) {
c->ohead_lnum += 1;
c->ohead_offs = 0;
gap = c->leb_size;
if (c->ohead_lnum > c->orph_last) {
/*
* We limit the number of orphans so that this should
* never happen.
*/
ubifs_err("out of space in orphan area");
return -EINVAL;
}
}
cnt = (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64);
if (cnt > c->cmt_orphans)
cnt = c->cmt_orphans;
len = UBIFS_ORPH_NODE_SZ + cnt * sizeof(__le64);
ubifs_assert(c->orph_buf);
orph = c->orph_buf;
orph->ch.node_type = UBIFS_ORPH_NODE;
spin_lock(&c->orphan_lock);
cnext = c->orph_cnext;
for (i = 0; i < cnt; i++) {
orphan = cnext;
ubifs_assert(orphan->cmt);
orph->inos[i] = cpu_to_le64(orphan->inum);
orphan->cmt = 0;
cnext = orphan->cnext;
orphan->cnext = NULL;
}
c->orph_cnext = cnext;
c->cmt_orphans -= cnt;
spin_unlock(&c->orphan_lock);
if (c->cmt_orphans)
orph->cmt_no = cpu_to_le64(c->cmt_no);
else
/* Mark the last node of the commit */
orph->cmt_no = cpu_to_le64((c->cmt_no) | (1ULL << 63));
ubifs_assert(c->ohead_offs + len <= c->leb_size);
ubifs_assert(c->ohead_lnum >= c->orph_first);
ubifs_assert(c->ohead_lnum <= c->orph_last);
err = do_write_orph_node(c, len, atomic);
c->ohead_offs += ALIGN(len, c->min_io_size);
c->ohead_offs = ALIGN(c->ohead_offs, 8);
return err;
}
/**
* write_orph_nodes - write orphan nodes until there are no more to commit.
* @c: UBIFS file-system description object
* @atomic: write atomically
*
* This function writes orphan nodes for all the orphans to commit. On success,
* %0 is returned, otherwise a negative error code is returned.
*/
static int write_orph_nodes(struct ubifs_info *c, int atomic)
{
int err;
while (c->cmt_orphans > 0) {
err = write_orph_node(c, atomic);
if (err)
return err;
}
if (atomic) {
int lnum;
/* Unmap any unused LEBs after consolidation */
for (lnum = c->ohead_lnum + 1; lnum <= c->orph_last; lnum++) {
err = ubifs_leb_unmap(c, lnum);
if (err)
return err;
}
}
return 0;
}
/**
* consolidate - consolidate the orphan area.
* @c: UBIFS file-system description object
*
* This function enables consolidation by putting all the orphans into the list
* to commit. The list is in the order that the orphans were added, and the
* LEBs are written atomically in order, so at no time can orphans be lost by
* an unclean unmount.
*
* This function returns %0 on success and a negative error code on failure.
*/
static int consolidate(struct ubifs_info *c)
{
int tot_avail = tot_avail_orphs(c), err = 0;
spin_lock(&c->orphan_lock);
dbg_cmt("there is space for %d orphans and there are %d",
tot_avail, c->tot_orphans);
if (c->tot_orphans - c->new_orphans <= tot_avail) {
struct ubifs_orphan *orphan, **last;
int cnt = 0;
/* Change the cnext list to include all non-new orphans */
last = &c->orph_cnext;
list_for_each_entry(orphan, &c->orph_list, list) {
if (orphan->new)
continue;
orphan->cmt = 1;
*last = orphan;
last = &orphan->cnext;
cnt += 1;
}
*last = NULL;
ubifs_assert(cnt == c->tot_orphans - c->new_orphans);
c->cmt_orphans = cnt;
c->ohead_lnum = c->orph_first;
c->ohead_offs = 0;
} else {
/*
* We limit the number of orphans so that this should
* never happen.
*/
ubifs_err("out of space in orphan area");
err = -EINVAL;
}
spin_unlock(&c->orphan_lock);
return err;
}
/**
* commit_orphans - commit orphans.
* @c: UBIFS file-system description object
*
* This function commits orphans to flash. On success, %0 is returned,
* otherwise a negative error code is returned.
*/
static int commit_orphans(struct ubifs_info *c)
{
int avail, atomic = 0, err;
ubifs_assert(c->cmt_orphans > 0);
avail = avail_orphs(c);
if (avail < c->cmt_orphans) {
/* Not enough space to write new orphans, so consolidate */
err = consolidate(c);
if (err)
return err;
atomic = 1;
}
err = write_orph_nodes(c, atomic);
return err;
}
/**
* erase_deleted - erase the orphans marked for deletion.
* @c: UBIFS file-system description object
*
* During commit, the orphans being committed cannot be deleted, so they are
* marked for deletion and deleted by this function. Also, the recovery
* adds killed orphans to the deletion list, and therefore they are deleted
* here too.
*/
static void erase_deleted(struct ubifs_info *c)
{
struct ubifs_orphan *orphan, *dnext;
spin_lock(&c->orphan_lock);
dnext = c->orph_dnext;
while (dnext) {
orphan = dnext;
dnext = orphan->dnext;
ubifs_assert(!orphan->new);
ubifs_assert(orphan->del);
rb_erase(&orphan->rb, &c->orph_tree);
list_del(&orphan->list);
c->tot_orphans -= 1;
dbg_gen("deleting orphan ino %lu", (unsigned long)orphan->inum);
kfree(orphan);
}
c->orph_dnext = NULL;
spin_unlock(&c->orphan_lock);
}
/**
* ubifs_orphan_end_commit - end commit of orphans.
* @c: UBIFS file-system description object
*
* End commit of orphans.
*/
int ubifs_orphan_end_commit(struct ubifs_info *c)
{
int err;
if (c->cmt_orphans != 0) {
err = commit_orphans(c);
if (err)
return err;
}
erase_deleted(c);
err = dbg_check_orphans(c);
return err;
}
/**
* ubifs_clear_orphans - erase all LEBs used for orphans.
* @c: UBIFS file-system description object
*
* If recovery is not required, then the orphans from the previous session
* are not needed. This function locates the LEBs used to record
* orphans, and un-maps them.
*/
int ubifs_clear_orphans(struct ubifs_info *c)
{
int lnum, err;
for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
err = ubifs_leb_unmap(c, lnum);
if (err)
return err;
}
c->ohead_lnum = c->orph_first;
c->ohead_offs = 0;
return 0;
}
/**
* insert_dead_orphan - insert an orphan.
* @c: UBIFS file-system description object
* @inum: orphan inode number
*
* This function is a helper to the 'do_kill_orphans()' function. The orphan
* must be kept until the next commit, so it is added to the rb-tree and the
* deletion list.
*/
static int insert_dead_orphan(struct ubifs_info *c, ino_t inum)
{
struct ubifs_orphan *orphan, *o;
struct rb_node **p, *parent = NULL;
orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_KERNEL);
if (!orphan)
return -ENOMEM;
orphan->inum = inum;
p = &c->orph_tree.rb_node;
while (*p) {
parent = *p;
o = rb_entry(parent, struct ubifs_orphan, rb);
if (inum < o->inum)
p = &(*p)->rb_left;
else if (inum > o->inum)
p = &(*p)->rb_right;
else {
/* Already added - no problem */
kfree(orphan);
return 0;
}
}
c->tot_orphans += 1;
rb_link_node(&orphan->rb, parent, p);
rb_insert_color(&orphan->rb, &c->orph_tree);
list_add_tail(&orphan->list, &c->orph_list);
orphan->del = 1;
orphan->dnext = c->orph_dnext;
c->orph_dnext = orphan;
dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum,
c->new_orphans, c->tot_orphans);
return 0;
}
/**
* do_kill_orphans - remove orphan inodes from the index.
* @c: UBIFS file-system description object
* @sleb: scanned LEB
* @last_cmt_no: cmt_no of last orphan node read is passed and returned here
* @outofdate: whether the LEB is out of date is returned here
* @last_flagged: whether the end orphan node is encountered
*
* This function is a helper to the 'kill_orphans()' function. It goes through
* every orphan node in a LEB and for every inode number recorded, removes
* all keys for that inode from the TNC.
*/
static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
unsigned long long *last_cmt_no, int *outofdate,
int *last_flagged)
{
struct ubifs_scan_node *snod;
struct ubifs_orph_node *orph;
unsigned long long cmt_no;
ino_t inum;
int i, n, err, first = 1;
list_for_each_entry(snod, &sleb->nodes, list) {
if (snod->type != UBIFS_ORPH_NODE) {
ubifs_err("invalid node type %d in orphan area at %d:%d",
snod->type, sleb->lnum, snod->offs);
ubifs_dump_node(c, snod->node);
return -EINVAL;
}
orph = snod->node;
/* Check commit number */
cmt_no = le64_to_cpu(orph->cmt_no) & LLONG_MAX;
/*
* The commit number on the master node may be less, because
* of a failed commit. If there are several failed commits in a
* row, the commit number written on orphan nodes will continue
* to increase (because the commit number is adjusted here) even
* though the commit number on the master node stays the same
* because the master node has not been re-written.
*/
if (cmt_no > c->cmt_no)
c->cmt_no = cmt_no;
if (cmt_no < *last_cmt_no && *last_flagged) {
/*
* The last orphan node had a higher commit number and
* was flagged as the last written for that commit
* number. That makes this orphan node, out of date.
*/
if (!first) {
ubifs_err("out of order commit number %llu in orphan node at %d:%d",
cmt_no, sleb->lnum, snod->offs);
ubifs_dump_node(c, snod->node);
return -EINVAL;
}
dbg_rcvry("out of date LEB %d", sleb->lnum);
*outofdate = 1;
return 0;
}
if (first)
first = 0;
n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3;
for (i = 0; i < n; i++) {
inum = le64_to_cpu(orph->inos[i]);
dbg_rcvry("deleting orphaned inode %lu",
(unsigned long)inum);
err = ubifs_tnc_remove_ino(c, inum);
if (err)
return err;
err = insert_dead_orphan(c, inum);
if (err)
return err;
}
*last_cmt_no = cmt_no;
if (le64_to_cpu(orph->cmt_no) & (1ULL << 63)) {
dbg_rcvry("last orph node for commit %llu at %d:%d",
cmt_no, sleb->lnum, snod->offs);
*last_flagged = 1;
} else
*last_flagged = 0;
}
return 0;
}
/**
* kill_orphans - remove all orphan inodes from the index.
* @c: UBIFS file-system description object
*
* If recovery is required, then orphan inodes recorded during the previous
* session (which ended with an unclean unmount) must be deleted from the index.
* This is done by updating the TNC, but since the index is not updated until
* the next commit, the LEBs where the orphan information is recorded are not
* erased until the next commit.
*/
static int kill_orphans(struct ubifs_info *c)
{
unsigned long long last_cmt_no = 0;
int lnum, err = 0, outofdate = 0, last_flagged = 0;
c->ohead_lnum = c->orph_first;
c->ohead_offs = 0;
/* Check no-orphans flag and skip this if no orphans */
if (c->no_orphs) {
dbg_rcvry("no orphans");
return 0;
}
/*
* Orph nodes always start at c->orph_first and are written to each
* successive LEB in turn. Generally unused LEBs will have been unmapped
* but may contain out of date orphan nodes if the unmap didn't go
* through. In addition, the last orphan node written for each commit is
* marked (top bit of orph->cmt_no is set to 1). It is possible that
* there are orphan nodes from the next commit (i.e. the commit did not
* complete successfully). In that case, no orphans will have been lost
* due to the way that orphans are written, and any orphans added will
* be valid orphans anyway and so can be deleted.
*/
for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
struct ubifs_scan_leb *sleb;
dbg_rcvry("LEB %d", lnum);
sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
if (IS_ERR(sleb)) {
if (PTR_ERR(sleb) == -EUCLEAN)
sleb = ubifs_recover_leb(c, lnum, 0,
c->sbuf, -1);
if (IS_ERR(sleb)) {
err = PTR_ERR(sleb);
break;
}
}
err = do_kill_orphans(c, sleb, &last_cmt_no, &outofdate,
&last_flagged);
if (err || outofdate) {
ubifs_scan_destroy(sleb);
break;
}
if (sleb->endpt) {
c->ohead_lnum = lnum;
c->ohead_offs = sleb->endpt;
}
ubifs_scan_destroy(sleb);
}
return err;
}
/**
* ubifs_mount_orphans - delete orphan inodes and erase LEBs that recorded them.
* @c: UBIFS file-system description object
* @unclean: indicates recovery from unclean unmount
* @read_only: indicates read only mount
*
* This function is called when mounting to erase orphans from the previous
* session. If UBIFS was not unmounted cleanly, then the inodes recorded as
* orphans are deleted.
*/
int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only)
{
int err = 0;
c->max_orphans = tot_avail_orphs(c);
if (!read_only) {
c->orph_buf = vmalloc(c->leb_size);
if (!c->orph_buf)
return -ENOMEM;
}
if (unclean)
err = kill_orphans(c);
else if (!read_only)
err = ubifs_clear_orphans(c);
return err;
}
/*
* Everything below is related to debugging.
*/
struct check_orphan {
struct rb_node rb;
ino_t inum;
};
struct check_info {
unsigned long last_ino;
unsigned long tot_inos;
unsigned long missing;
unsigned long long leaf_cnt;
struct ubifs_ino_node *node;
struct rb_root root;
};
static int dbg_find_orphan(struct ubifs_info *c, ino_t inum)
{
struct ubifs_orphan *o;
struct rb_node *p;
spin_lock(&c->orphan_lock);
p = c->orph_tree.rb_node;
while (p) {
o = rb_entry(p, struct ubifs_orphan, rb);
if (inum < o->inum)
p = p->rb_left;
else if (inum > o->inum)
p = p->rb_right;
else {
spin_unlock(&c->orphan_lock);
return 1;
}
}
spin_unlock(&c->orphan_lock);
return 0;
}
static int dbg_ins_check_orphan(struct rb_root *root, ino_t inum)
{
struct check_orphan *orphan, *o;
struct rb_node **p, *parent = NULL;
orphan = kzalloc(sizeof(struct check_orphan), GFP_NOFS);
if (!orphan)
return -ENOMEM;
orphan->inum = inum;
p = &root->rb_node;
while (*p) {
parent = *p;
o = rb_entry(parent, struct check_orphan, rb);
if (inum < o->inum)
p = &(*p)->rb_left;
else if (inum > o->inum)
p = &(*p)->rb_right;
else {
kfree(orphan);
return 0;
}
}
rb_link_node(&orphan->rb, parent, p);
rb_insert_color(&orphan->rb, root);
return 0;
}
static int dbg_find_check_orphan(struct rb_root *root, ino_t inum)
{
struct check_orphan *o;
struct rb_node *p;
p = root->rb_node;
while (p) {
o = rb_entry(p, struct check_orphan, rb);
if (inum < o->inum)
p = p->rb_left;
else if (inum > o->inum)
p = p->rb_right;
else
return 1;
}
return 0;
}
static void dbg_free_check_tree(struct rb_root *root)
{
struct check_orphan *o, *n;
rbtree_postorder_for_each_entry_safe(o, n, root, rb)
kfree(o);
}
static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr,
void *priv)
{
struct check_info *ci = priv;
ino_t inum;
int err;
inum = key_inum(c, &zbr->key);
if (inum != ci->last_ino) {
/* Lowest node type is the inode node, so it comes first */
if (key_type(c, &zbr->key) != UBIFS_INO_KEY)
ubifs_err("found orphan node ino %lu, type %d",
(unsigned long)inum, key_type(c, &zbr->key));
ci->last_ino = inum;
ci->tot_inos += 1;
err = ubifs_tnc_read_node(c, zbr, ci->node);
if (err) {
ubifs_err("node read failed, error %d", err);
return err;
}
if (ci->node->nlink == 0)
/* Must be recorded as an orphan */
if (!dbg_find_check_orphan(&ci->root, inum) &&
!dbg_find_orphan(c, inum)) {
ubifs_err("missing orphan, ino %lu",
(unsigned long)inum);
ci->missing += 1;
}
}
ci->leaf_cnt += 1;
return 0;
}
static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb)
{
struct ubifs_scan_node *snod;
struct ubifs_orph_node *orph;
ino_t inum;
int i, n, err;
list_for_each_entry(snod, &sleb->nodes, list) {
cond_resched();
if (snod->type != UBIFS_ORPH_NODE)
continue;
orph = snod->node;
n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3;
for (i = 0; i < n; i++) {
inum = le64_to_cpu(orph->inos[i]);
err = dbg_ins_check_orphan(&ci->root, inum);
if (err)
return err;
}
}
return 0;
}
static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
{
int lnum, err = 0;
void *buf;
/* Check no-orphans flag and skip this if no orphans */
if (c->no_orphs)
return 0;
buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
if (!buf) {
ubifs_err("cannot allocate memory to check orphans");
return 0;
}
for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
struct ubifs_scan_leb *sleb;
sleb = ubifs_scan(c, lnum, 0, buf, 0);
if (IS_ERR(sleb)) {
err = PTR_ERR(sleb);
break;
}
err = dbg_read_orphans(ci, sleb);
ubifs_scan_destroy(sleb);
if (err)
break;
}
vfree(buf);
return err;
}
static int dbg_check_orphans(struct ubifs_info *c)
{
struct check_info ci;
int err;
if (!dbg_is_chk_orph(c))
return 0;
ci.last_ino = 0;
ci.tot_inos = 0;
ci.missing = 0;
ci.leaf_cnt = 0;
ci.root = RB_ROOT;
ci.node = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS);
if (!ci.node) {
ubifs_err("out of memory");
return -ENOMEM;
}
err = dbg_scan_orphans(c, &ci);
if (err)
goto out;
err = dbg_walk_index(c, &dbg_orphan_check, NULL, &ci);
if (err) {
ubifs_err("cannot scan TNC, error %d", err);
goto out;
}
if (ci.missing) {
ubifs_err("%lu missing orphan(s)", ci.missing);
err = -EINVAL;
goto out;
}
dbg_cmt("last inode number is %lu", ci.last_ino);
dbg_cmt("total number of inodes is %lu", ci.tot_inos);
dbg_cmt("total number of leaf nodes is %llu", ci.leaf_cnt);
out:
dbg_free_check_tree(&ci.root);
kfree(ci.node);
return err;
}

1553
fs/ubifs/recovery.c Normal file

File diff suppressed because it is too large Load diff

1069
fs/ubifs/replay.c Normal file

File diff suppressed because it is too large Load diff

809
fs/ubifs/sb.c Normal file
View file

@ -0,0 +1,809 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Artem Bityutskiy (Битюцкий Артём)
* Adrian Hunter
*/
/*
* This file implements UBIFS superblock. The superblock is stored at the first
* LEB of the volume and is never changed by UBIFS. Only user-space tools may
* change it. The superblock node mostly contains geometry information.
*/
#include "ubifs.h"
#include <linux/slab.h>
#include <linux/random.h>
#include <linux/math64.h>
/*
* Default journal size in logical eraseblocks as a percent of total
* flash size.
*/
#define DEFAULT_JNL_PERCENT 5
/* Default maximum journal size in bytes */
#define DEFAULT_MAX_JNL (32*1024*1024)
/* Default indexing tree fanout */
#define DEFAULT_FANOUT 8
/* Default number of data journal heads */
#define DEFAULT_JHEADS_CNT 1
/* Default positions of different LEBs in the main area */
#define DEFAULT_IDX_LEB 0
#define DEFAULT_DATA_LEB 1
#define DEFAULT_GC_LEB 2
/* Default number of LEB numbers in LPT's save table */
#define DEFAULT_LSAVE_CNT 256
/* Default reserved pool size as a percent of maximum free space */
#define DEFAULT_RP_PERCENT 5
/* The default maximum size of reserved pool in bytes */
#define DEFAULT_MAX_RP_SIZE (5*1024*1024)
/* Default time granularity in nanoseconds */
#define DEFAULT_TIME_GRAN 1000000000
/**
* create_default_filesystem - format empty UBI volume.
* @c: UBIFS file-system description object
*
* This function creates default empty file-system. Returns zero in case of
* success and a negative error code in case of failure.
*/
static int create_default_filesystem(struct ubifs_info *c)
{
struct ubifs_sb_node *sup;
struct ubifs_mst_node *mst;
struct ubifs_idx_node *idx;
struct ubifs_branch *br;
struct ubifs_ino_node *ino;
struct ubifs_cs_node *cs;
union ubifs_key key;
int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first;
int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0;
int min_leb_cnt = UBIFS_MIN_LEB_CNT;
long long tmp64, main_bytes;
__le64 tmp_le64;
/* Some functions called from here depend on the @c->key_len filed */
c->key_len = UBIFS_SK_LEN;
/*
* First of all, we have to calculate default file-system geometry -
* log size, journal size, etc.
*/
if (c->leb_cnt < 0x7FFFFFFF / DEFAULT_JNL_PERCENT)
/* We can first multiply then divide and have no overflow */
jnl_lebs = c->leb_cnt * DEFAULT_JNL_PERCENT / 100;
else
jnl_lebs = (c->leb_cnt / 100) * DEFAULT_JNL_PERCENT;
if (jnl_lebs < UBIFS_MIN_JNL_LEBS)
jnl_lebs = UBIFS_MIN_JNL_LEBS;
if (jnl_lebs * c->leb_size > DEFAULT_MAX_JNL)
jnl_lebs = DEFAULT_MAX_JNL / c->leb_size;
/*
* The log should be large enough to fit reference nodes for all bud
* LEBs. Because buds do not have to start from the beginning of LEBs
* (half of the LEB may contain committed data), the log should
* generally be larger, make it twice as large.
*/
tmp = 2 * (c->ref_node_alsz * jnl_lebs) + c->leb_size - 1;
log_lebs = tmp / c->leb_size;
/* Plus one LEB reserved for commit */
log_lebs += 1;
if (c->leb_cnt - min_leb_cnt > 8) {
/* And some extra space to allow writes while committing */
log_lebs += 1;
min_leb_cnt += 1;
}
max_buds = jnl_lebs - log_lebs;
if (max_buds < UBIFS_MIN_BUD_LEBS)
max_buds = UBIFS_MIN_BUD_LEBS;
/*
* Orphan nodes are stored in a separate area. One node can store a lot
* of orphan inode numbers, but when new orphan comes we just add a new
* orphan node. At some point the nodes are consolidated into one
* orphan node.
*/
orph_lebs = UBIFS_MIN_ORPH_LEBS;
if (c->leb_cnt - min_leb_cnt > 1)
/*
* For debugging purposes it is better to have at least 2
* orphan LEBs, because the orphan subsystem would need to do
* consolidations and would be stressed more.
*/
orph_lebs += 1;
main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - log_lebs;
main_lebs -= orph_lebs;
lpt_first = UBIFS_LOG_LNUM + log_lebs;
c->lsave_cnt = DEFAULT_LSAVE_CNT;
c->max_leb_cnt = c->leb_cnt;
err = ubifs_create_dflt_lpt(c, &main_lebs, lpt_first, &lpt_lebs,
&big_lpt);
if (err)
return err;
dbg_gen("LEB Properties Tree created (LEBs %d-%d)", lpt_first,
lpt_first + lpt_lebs - 1);
main_first = c->leb_cnt - main_lebs;
/* Create default superblock */
tmp = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size);
sup = kzalloc(tmp, GFP_KERNEL);
if (!sup)
return -ENOMEM;
tmp64 = (long long)max_buds * c->leb_size;
if (big_lpt)
sup_flags |= UBIFS_FLG_BIGLPT;
sup->ch.node_type = UBIFS_SB_NODE;
sup->key_hash = UBIFS_KEY_HASH_R5;
sup->flags = cpu_to_le32(sup_flags);
sup->min_io_size = cpu_to_le32(c->min_io_size);
sup->leb_size = cpu_to_le32(c->leb_size);
sup->leb_cnt = cpu_to_le32(c->leb_cnt);
sup->max_leb_cnt = cpu_to_le32(c->max_leb_cnt);
sup->max_bud_bytes = cpu_to_le64(tmp64);
sup->log_lebs = cpu_to_le32(log_lebs);
sup->lpt_lebs = cpu_to_le32(lpt_lebs);
sup->orph_lebs = cpu_to_le32(orph_lebs);
sup->jhead_cnt = cpu_to_le32(DEFAULT_JHEADS_CNT);
sup->fanout = cpu_to_le32(DEFAULT_FANOUT);
sup->lsave_cnt = cpu_to_le32(c->lsave_cnt);
sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION);
sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN);
if (c->mount_opts.override_compr)
sup->default_compr = cpu_to_le16(c->mount_opts.compr_type);
else
sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);
generate_random_uuid(sup->uuid);
main_bytes = (long long)main_lebs * c->leb_size;
tmp64 = div_u64(main_bytes * DEFAULT_RP_PERCENT, 100);
if (tmp64 > DEFAULT_MAX_RP_SIZE)
tmp64 = DEFAULT_MAX_RP_SIZE;
sup->rp_size = cpu_to_le64(tmp64);
sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION);
err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0);
kfree(sup);
if (err)
return err;
dbg_gen("default superblock created at LEB 0:0");
/* Create default master node */
mst = kzalloc(c->mst_node_alsz, GFP_KERNEL);
if (!mst)
return -ENOMEM;
mst->ch.node_type = UBIFS_MST_NODE;
mst->log_lnum = cpu_to_le32(UBIFS_LOG_LNUM);
mst->highest_inum = cpu_to_le64(UBIFS_FIRST_INO);
mst->cmt_no = 0;
mst->root_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB);
mst->root_offs = 0;
tmp = ubifs_idx_node_sz(c, 1);
mst->root_len = cpu_to_le32(tmp);
mst->gc_lnum = cpu_to_le32(main_first + DEFAULT_GC_LEB);
mst->ihead_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB);
mst->ihead_offs = cpu_to_le32(ALIGN(tmp, c->min_io_size));
mst->index_size = cpu_to_le64(ALIGN(tmp, 8));
mst->lpt_lnum = cpu_to_le32(c->lpt_lnum);
mst->lpt_offs = cpu_to_le32(c->lpt_offs);
mst->nhead_lnum = cpu_to_le32(c->nhead_lnum);
mst->nhead_offs = cpu_to_le32(c->nhead_offs);
mst->ltab_lnum = cpu_to_le32(c->ltab_lnum);
mst->ltab_offs = cpu_to_le32(c->ltab_offs);
mst->lsave_lnum = cpu_to_le32(c->lsave_lnum);
mst->lsave_offs = cpu_to_le32(c->lsave_offs);
mst->lscan_lnum = cpu_to_le32(main_first);
mst->empty_lebs = cpu_to_le32(main_lebs - 2);
mst->idx_lebs = cpu_to_le32(1);
mst->leb_cnt = cpu_to_le32(c->leb_cnt);
/* Calculate lprops statistics */
tmp64 = main_bytes;
tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size);
tmp64 -= ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size);
mst->total_free = cpu_to_le64(tmp64);
tmp64 = ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size);
ino_waste = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size) -
UBIFS_INO_NODE_SZ;
tmp64 += ino_waste;
tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), 8);
mst->total_dirty = cpu_to_le64(tmp64);
/* The indexing LEB does not contribute to dark space */
tmp64 = ((long long)(c->main_lebs - 1) * c->dark_wm);
mst->total_dark = cpu_to_le64(tmp64);
mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ);
err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0);
if (err) {
kfree(mst);
return err;
}
err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1,
0);
kfree(mst);
if (err)
return err;
dbg_gen("default master node created at LEB %d:0", UBIFS_MST_LNUM);
/* Create the root indexing node */
tmp = ubifs_idx_node_sz(c, 1);
idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL);
if (!idx)
return -ENOMEM;
c->key_fmt = UBIFS_SIMPLE_KEY_FMT;
c->key_hash = key_r5_hash;
idx->ch.node_type = UBIFS_IDX_NODE;
idx->child_cnt = cpu_to_le16(1);
ino_key_init(c, &key, UBIFS_ROOT_INO);
br = ubifs_idx_branch(c, idx, 0);
key_write_idx(c, &key, &br->key);
br->lnum = cpu_to_le32(main_first + DEFAULT_DATA_LEB);
br->len = cpu_to_le32(UBIFS_INO_NODE_SZ);
err = ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0);
kfree(idx);
if (err)
return err;
dbg_gen("default root indexing node created LEB %d:0",
main_first + DEFAULT_IDX_LEB);
/* Create default root inode */
tmp = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size);
ino = kzalloc(tmp, GFP_KERNEL);
if (!ino)
return -ENOMEM;
ino_key_init_flash(c, &ino->key, UBIFS_ROOT_INO);
ino->ch.node_type = UBIFS_INO_NODE;
ino->creat_sqnum = cpu_to_le64(++c->max_sqnum);
ino->nlink = cpu_to_le32(2);
tmp_le64 = cpu_to_le64(CURRENT_TIME_SEC.tv_sec);
ino->atime_sec = tmp_le64;
ino->ctime_sec = tmp_le64;
ino->mtime_sec = tmp_le64;
ino->atime_nsec = 0;
ino->ctime_nsec = 0;
ino->mtime_nsec = 0;
ino->mode = cpu_to_le32(S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO);
ino->size = cpu_to_le64(UBIFS_INO_NODE_SZ);
/* Set compression enabled by default */
ino->flags = cpu_to_le32(UBIFS_COMPR_FL);
err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ,
main_first + DEFAULT_DATA_LEB, 0);
kfree(ino);
if (err)
return err;
dbg_gen("root inode created at LEB %d:0",
main_first + DEFAULT_DATA_LEB);
/*
* The first node in the log has to be the commit start node. This is
* always the case during normal file-system operation. Write a fake
* commit start node to the log.
*/
tmp = ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size);
cs = kzalloc(tmp, GFP_KERNEL);
if (!cs)
return -ENOMEM;
cs->ch.node_type = UBIFS_CS_NODE;
err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM, 0);
kfree(cs);
if (err)
return err;
ubifs_msg("default file-system created");
return 0;
}
/**
* validate_sb - validate superblock node.
* @c: UBIFS file-system description object
* @sup: superblock node
*
* This function validates superblock node @sup. Since most of data was read
* from the superblock and stored in @c, the function validates fields in @c
* instead. Returns zero in case of success and %-EINVAL in case of validation
* failure.
*/
static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
{
long long max_bytes;
int err = 1, min_leb_cnt;
if (!c->key_hash) {
err = 2;
goto failed;
}
if (sup->key_fmt != UBIFS_SIMPLE_KEY_FMT) {
err = 3;
goto failed;
}
if (le32_to_cpu(sup->min_io_size) != c->min_io_size) {
ubifs_err("min. I/O unit mismatch: %d in superblock, %d real",
le32_to_cpu(sup->min_io_size), c->min_io_size);
goto failed;
}
if (le32_to_cpu(sup->leb_size) != c->leb_size) {
ubifs_err("LEB size mismatch: %d in superblock, %d real",
le32_to_cpu(sup->leb_size), c->leb_size);
goto failed;
}
if (c->log_lebs < UBIFS_MIN_LOG_LEBS ||
c->lpt_lebs < UBIFS_MIN_LPT_LEBS ||
c->orph_lebs < UBIFS_MIN_ORPH_LEBS ||
c->main_lebs < UBIFS_MIN_MAIN_LEBS) {
err = 4;
goto failed;
}
/*
* Calculate minimum allowed amount of main area LEBs. This is very
* similar to %UBIFS_MIN_LEB_CNT, but we take into account real what we
* have just read from the superblock.
*/
min_leb_cnt = UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs;
min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6;
if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) {
ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, %d minimum required",
c->leb_cnt, c->vi.size, min_leb_cnt);
goto failed;
}
if (c->max_leb_cnt < c->leb_cnt) {
ubifs_err("max. LEB count %d less than LEB count %d",
c->max_leb_cnt, c->leb_cnt);
goto failed;
}
if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) {
ubifs_err("too few main LEBs count %d, must be at least %d",
c->main_lebs, UBIFS_MIN_MAIN_LEBS);
goto failed;
}
max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS;
if (c->max_bud_bytes < max_bytes) {
ubifs_err("too small journal (%lld bytes), must be at least %lld bytes",
c->max_bud_bytes, max_bytes);
goto failed;
}
max_bytes = (long long)c->leb_size * c->main_lebs;
if (c->max_bud_bytes > max_bytes) {
ubifs_err("too large journal size (%lld bytes), only %lld bytes available in the main area",
c->max_bud_bytes, max_bytes);
goto failed;
}
if (c->jhead_cnt < NONDATA_JHEADS_CNT + 1 ||
c->jhead_cnt > NONDATA_JHEADS_CNT + UBIFS_MAX_JHEADS) {
err = 9;
goto failed;
}
if (c->fanout < UBIFS_MIN_FANOUT ||
ubifs_idx_node_sz(c, c->fanout) > c->leb_size) {
err = 10;
goto failed;
}
if (c->lsave_cnt < 0 || (c->lsave_cnt > DEFAULT_LSAVE_CNT &&
c->lsave_cnt > c->max_leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS -
c->log_lebs - c->lpt_lebs - c->orph_lebs)) {
err = 11;
goto failed;
}
if (UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs + c->lpt_lebs +
c->orph_lebs + c->main_lebs != c->leb_cnt) {
err = 12;
goto failed;
}
if (c->default_compr >= UBIFS_COMPR_TYPES_CNT) {
err = 13;
goto failed;
}
if (c->rp_size < 0 || max_bytes < c->rp_size) {
err = 14;
goto failed;
}
if (le32_to_cpu(sup->time_gran) > 1000000000 ||
le32_to_cpu(sup->time_gran) < 1) {
err = 15;
goto failed;
}
return 0;
failed:
ubifs_err("bad superblock, error %d", err);
ubifs_dump_node(c, sup);
return -EINVAL;
}
/**
* ubifs_read_sb_node - read superblock node.
* @c: UBIFS file-system description object
*
* This function returns a pointer to the superblock node or a negative error
* code. Note, the user of this function is responsible of kfree()'ing the
* returned superblock buffer.
*/
struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
{
struct ubifs_sb_node *sup;
int err;
sup = kmalloc(ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size), GFP_NOFS);
if (!sup)
return ERR_PTR(-ENOMEM);
err = ubifs_read_node(c, sup, UBIFS_SB_NODE, UBIFS_SB_NODE_SZ,
UBIFS_SB_LNUM, 0);
if (err) {
kfree(sup);
return ERR_PTR(err);
}
return sup;
}
/**
* ubifs_write_sb_node - write superblock node.
* @c: UBIFS file-system description object
* @sup: superblock node read with 'ubifs_read_sb_node()'
*
* This function returns %0 on success and a negative error code on failure.
*/
int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup)
{
int len = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size);
ubifs_prepare_node(c, sup, UBIFS_SB_NODE_SZ, 1);
return ubifs_leb_change(c, UBIFS_SB_LNUM, sup, len);
}
/**
* ubifs_read_superblock - read superblock.
* @c: UBIFS file-system description object
*
* This function finds, reads and checks the superblock. If an empty UBI volume
* is being mounted, this function creates default superblock. Returns zero in
* case of success, and a negative error code in case of failure.
*/
int ubifs_read_superblock(struct ubifs_info *c)
{
int err, sup_flags;
struct ubifs_sb_node *sup;
if (c->empty) {
err = create_default_filesystem(c);
if (err)
return err;
}
sup = ubifs_read_sb_node(c);
if (IS_ERR(sup))
return PTR_ERR(sup);
c->fmt_version = le32_to_cpu(sup->fmt_version);
c->ro_compat_version = le32_to_cpu(sup->ro_compat_version);
/*
* The software supports all previous versions but not future versions,
* due to the unavailability of time-travelling equipment.
*/
if (c->fmt_version > UBIFS_FORMAT_VERSION) {
ubifs_assert(!c->ro_media || c->ro_mount);
if (!c->ro_mount ||
c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) {
ubifs_err("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d",
c->fmt_version, c->ro_compat_version,
UBIFS_FORMAT_VERSION,
UBIFS_RO_COMPAT_VERSION);
if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) {
ubifs_msg("only R/O mounting is possible");
err = -EROFS;
} else
err = -EINVAL;
goto out;
}
/*
* The FS is mounted R/O, and the media format is
* R/O-compatible with the UBIFS implementation, so we can
* mount.
*/
c->rw_incompat = 1;
}
if (c->fmt_version < 3) {
ubifs_err("on-flash format version %d is not supported",
c->fmt_version);
err = -EINVAL;
goto out;
}
switch (sup->key_hash) {
case UBIFS_KEY_HASH_R5:
c->key_hash = key_r5_hash;
c->key_hash_type = UBIFS_KEY_HASH_R5;
break;
case UBIFS_KEY_HASH_TEST:
c->key_hash = key_test_hash;
c->key_hash_type = UBIFS_KEY_HASH_TEST;
break;
};
c->key_fmt = sup->key_fmt;
switch (c->key_fmt) {
case UBIFS_SIMPLE_KEY_FMT:
c->key_len = UBIFS_SK_LEN;
break;
default:
ubifs_err("unsupported key format");
err = -EINVAL;
goto out;
}
c->leb_cnt = le32_to_cpu(sup->leb_cnt);
c->max_leb_cnt = le32_to_cpu(sup->max_leb_cnt);
c->max_bud_bytes = le64_to_cpu(sup->max_bud_bytes);
c->log_lebs = le32_to_cpu(sup->log_lebs);
c->lpt_lebs = le32_to_cpu(sup->lpt_lebs);
c->orph_lebs = le32_to_cpu(sup->orph_lebs);
c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT;
c->fanout = le32_to_cpu(sup->fanout);
c->lsave_cnt = le32_to_cpu(sup->lsave_cnt);
c->rp_size = le64_to_cpu(sup->rp_size);
c->rp_uid = make_kuid(&init_user_ns, le32_to_cpu(sup->rp_uid));
c->rp_gid = make_kgid(&init_user_ns, le32_to_cpu(sup->rp_gid));
sup_flags = le32_to_cpu(sup->flags);
if (!c->mount_opts.override_compr)
c->default_compr = le16_to_cpu(sup->default_compr);
c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);
memcpy(&c->uuid, &sup->uuid, 16);
c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);
c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP);
/* Automatically increase file system size to the maximum size */
c->old_leb_cnt = c->leb_cnt;
if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) {
c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size);
if (c->ro_mount)
dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs",
c->old_leb_cnt, c->leb_cnt);
else {
dbg_mnt("Auto resizing (sb) from %d LEBs to %d LEBs",
c->old_leb_cnt, c->leb_cnt);
sup->leb_cnt = cpu_to_le32(c->leb_cnt);
err = ubifs_write_sb_node(c, sup);
if (err)
goto out;
c->old_leb_cnt = c->leb_cnt;
}
}
c->log_bytes = (long long)c->log_lebs * c->leb_size;
c->log_last = UBIFS_LOG_LNUM + c->log_lebs - 1;
c->lpt_first = UBIFS_LOG_LNUM + c->log_lebs;
c->lpt_last = c->lpt_first + c->lpt_lebs - 1;
c->orph_first = c->lpt_last + 1;
c->orph_last = c->orph_first + c->orph_lebs - 1;
c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS;
c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs;
c->main_first = c->leb_cnt - c->main_lebs;
err = validate_sb(c, sup);
out:
kfree(sup);
return err;
}
/**
* fixup_leb - fixup/unmap an LEB containing free space.
* @c: UBIFS file-system description object
* @lnum: the LEB number to fix up
* @len: number of used bytes in LEB (starting at offset 0)
*
* This function reads the contents of the given LEB number @lnum, then fixes
* it up, so that empty min. I/O units in the end of LEB are actually erased on
* flash (rather than being just all-0xff real data). If the LEB is completely
* empty, it is simply unmapped.
*/
static int fixup_leb(struct ubifs_info *c, int lnum, int len)
{
int err;
ubifs_assert(len >= 0);
ubifs_assert(len % c->min_io_size == 0);
ubifs_assert(len < c->leb_size);
if (len == 0) {
dbg_mnt("unmap empty LEB %d", lnum);
return ubifs_leb_unmap(c, lnum);
}
dbg_mnt("fixup LEB %d, data len %d", lnum, len);
err = ubifs_leb_read(c, lnum, c->sbuf, 0, len, 1);
if (err)
return err;
return ubifs_leb_change(c, lnum, c->sbuf, len);
}
/**
* fixup_free_space - find & remap all LEBs containing free space.
* @c: UBIFS file-system description object
*
* This function walks through all LEBs in the filesystem and fiexes up those
* containing free/empty space.
*/
static int fixup_free_space(struct ubifs_info *c)
{
int lnum, err = 0;
struct ubifs_lprops *lprops;
ubifs_get_lprops(c);
/* Fixup LEBs in the master area */
for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) {
err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz);
if (err)
goto out;
}
/* Unmap unused log LEBs */
lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
while (lnum != c->ltail_lnum) {
err = fixup_leb(c, lnum, 0);
if (err)
goto out;
lnum = ubifs_next_log_lnum(c, lnum);
}
/*
* Fixup the log head which contains the only a CS node at the
* beginning.
*/
err = fixup_leb(c, c->lhead_lnum,
ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size));
if (err)
goto out;
/* Fixup LEBs in the LPT area */
for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
int free = c->ltab[lnum - c->lpt_first].free;
if (free > 0) {
err = fixup_leb(c, lnum, c->leb_size - free);
if (err)
goto out;
}
}
/* Unmap LEBs in the orphans area */
for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
err = fixup_leb(c, lnum, 0);
if (err)
goto out;
}
/* Fixup LEBs in the main area */
for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
lprops = ubifs_lpt_lookup(c, lnum);
if (IS_ERR(lprops)) {
err = PTR_ERR(lprops);
goto out;
}
if (lprops->free > 0) {
err = fixup_leb(c, lnum, c->leb_size - lprops->free);
if (err)
goto out;
}
}
out:
ubifs_release_lprops(c);
return err;
}
/**
* ubifs_fixup_free_space - find & fix all LEBs with free space.
* @c: UBIFS file-system description object
*
* This function fixes up LEBs containing free space on first mount, if the
* appropriate flag was set when the FS was created. Each LEB with one or more
* empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure
* the free space is actually erased. E.g., this is necessary for some NAND
* chips, since the free space may have been programmed like real "0xff" data
* (generating a non-0xff ECC), causing future writes to the not-really-erased
* NAND pages to behave badly. After the space is fixed up, the superblock flag
* is cleared, so that this is skipped for all future mounts.
*/
int ubifs_fixup_free_space(struct ubifs_info *c)
{
int err;
struct ubifs_sb_node *sup;
ubifs_assert(c->space_fixup);
ubifs_assert(!c->ro_mount);
ubifs_msg("start fixing up free space");
err = fixup_free_space(c);
if (err)
return err;
sup = ubifs_read_sb_node(c);
if (IS_ERR(sup))
return PTR_ERR(sup);
/* Free-space fixup is no longer required */
c->space_fixup = 0;
sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP);
err = ubifs_write_sb_node(c, sup);
kfree(sup);
if (err)
return err;
ubifs_msg("free space fixup complete");
return err;
}

379
fs/ubifs/scan.c Normal file
View file

@ -0,0 +1,379 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Adrian Hunter
* Artem Bityutskiy (Битюцкий Артём)
*/
/*
* This file implements the scan which is a general-purpose function for
* determining what nodes are in an eraseblock. The scan is used to replay the
* journal, to do garbage collection. for the TNC in-the-gaps method, and by
* debugging functions.
*/
#include "ubifs.h"
/**
* scan_padding_bytes - scan for padding bytes.
* @buf: buffer to scan
* @len: length of buffer
*
* This function returns the number of padding bytes on success and
* %SCANNED_GARBAGE on failure.
*/
static int scan_padding_bytes(void *buf, int len)
{
int pad_len = 0, max_pad_len = min_t(int, UBIFS_PAD_NODE_SZ, len);
uint8_t *p = buf;
dbg_scan("not a node");
while (pad_len < max_pad_len && *p++ == UBIFS_PADDING_BYTE)
pad_len += 1;
if (!pad_len || (pad_len & 7))
return SCANNED_GARBAGE;
dbg_scan("%d padding bytes", pad_len);
return pad_len;
}
/**
* ubifs_scan_a_node - scan for a node or padding.
* @c: UBIFS file-system description object
* @buf: buffer to scan
* @len: length of buffer
* @lnum: logical eraseblock number
* @offs: offset within the logical eraseblock
* @quiet: print no messages
*
* This function returns a scanning code to indicate what was scanned.
*/
int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
int offs, int quiet)
{
struct ubifs_ch *ch = buf;
uint32_t magic;
magic = le32_to_cpu(ch->magic);
if (magic == 0xFFFFFFFF) {
dbg_scan("hit empty space at LEB %d:%d", lnum, offs);
return SCANNED_EMPTY_SPACE;
}
if (magic != UBIFS_NODE_MAGIC)
return scan_padding_bytes(buf, len);
if (len < UBIFS_CH_SZ)
return SCANNED_GARBAGE;
dbg_scan("scanning %s at LEB %d:%d",
dbg_ntype(ch->node_type), lnum, offs);
if (ubifs_check_node(c, buf, lnum, offs, quiet, 1))
return SCANNED_A_CORRUPT_NODE;
if (ch->node_type == UBIFS_PAD_NODE) {
struct ubifs_pad_node *pad = buf;
int pad_len = le32_to_cpu(pad->pad_len);
int node_len = le32_to_cpu(ch->len);
/* Validate the padding node */
if (pad_len < 0 ||
offs + node_len + pad_len > c->leb_size) {
if (!quiet) {
ubifs_err("bad pad node at LEB %d:%d",
lnum, offs);
ubifs_dump_node(c, pad);
}
return SCANNED_A_BAD_PAD_NODE;
}
/* Make the node pads to 8-byte boundary */
if ((node_len + pad_len) & 7) {
if (!quiet)
ubifs_err("bad padding length %d - %d",
offs, offs + node_len + pad_len);
return SCANNED_A_BAD_PAD_NODE;
}
dbg_scan("%d bytes padded at LEB %d:%d, offset now %d", pad_len,
lnum, offs, ALIGN(offs + node_len + pad_len, 8));
return node_len + pad_len;
}
return SCANNED_A_NODE;
}
/**
* ubifs_start_scan - create LEB scanning information at start of scan.
* @c: UBIFS file-system description object
* @lnum: logical eraseblock number
* @offs: offset to start at (usually zero)
* @sbuf: scan buffer (must be c->leb_size)
*
* This function returns the scanned information on success and a negative error
* code on failure.
*/
struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
int offs, void *sbuf)
{
struct ubifs_scan_leb *sleb;
int err;
dbg_scan("scan LEB %d:%d", lnum, offs);
sleb = kzalloc(sizeof(struct ubifs_scan_leb), GFP_NOFS);
if (!sleb)
return ERR_PTR(-ENOMEM);
sleb->lnum = lnum;
INIT_LIST_HEAD(&sleb->nodes);
sleb->buf = sbuf;
err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0);
if (err && err != -EBADMSG) {
ubifs_err("cannot read %d bytes from LEB %d:%d, error %d",
c->leb_size - offs, lnum, offs, err);
kfree(sleb);
return ERR_PTR(err);
}
/*
* Note, we ignore integrity errors (EBASMSG) because all the nodes are
* protected by CRC checksums.
*/
return sleb;
}
/**
* ubifs_end_scan - update LEB scanning information at end of scan.
* @c: UBIFS file-system description object
* @sleb: scanning information
* @lnum: logical eraseblock number
* @offs: offset to start at (usually zero)
*/
void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
int lnum, int offs)
{
lnum = lnum;
dbg_scan("stop scanning LEB %d at offset %d", lnum, offs);
ubifs_assert(offs % c->min_io_size == 0);
sleb->endpt = ALIGN(offs, c->min_io_size);
}
/**
* ubifs_add_snod - add a scanned node to LEB scanning information.
* @c: UBIFS file-system description object
* @sleb: scanning information
* @buf: buffer containing node
* @offs: offset of node on flash
*
* This function returns %0 on success and a negative error code on failure.
*/
int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
void *buf, int offs)
{
struct ubifs_ch *ch = buf;
struct ubifs_ino_node *ino = buf;
struct ubifs_scan_node *snod;
snod = kmalloc(sizeof(struct ubifs_scan_node), GFP_NOFS);
if (!snod)
return -ENOMEM;
snod->sqnum = le64_to_cpu(ch->sqnum);
snod->type = ch->node_type;
snod->offs = offs;
snod->len = le32_to_cpu(ch->len);
snod->node = buf;
switch (ch->node_type) {
case UBIFS_INO_NODE:
case UBIFS_DENT_NODE:
case UBIFS_XENT_NODE:
case UBIFS_DATA_NODE:
/*
* The key is in the same place in all keyed
* nodes.
*/
key_read(c, &ino->key, &snod->key);
break;
default:
invalid_key_init(c, &snod->key);
break;
}
list_add_tail(&snod->list, &sleb->nodes);
sleb->nodes_cnt += 1;
return 0;
}
/**
* ubifs_scanned_corruption - print information after UBIFS scanned corruption.
* @c: UBIFS file-system description object
* @lnum: LEB number of corruption
* @offs: offset of corruption
* @buf: buffer containing corruption
*/
void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
void *buf)
{
int len;
ubifs_err("corruption at LEB %d:%d", lnum, offs);
len = c->leb_size - offs;
if (len > 8192)
len = 8192;
ubifs_err("first %d bytes from LEB %d:%d", len, lnum, offs);
print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1);
}
/**
* ubifs_scan - scan a logical eraseblock.
* @c: UBIFS file-system description object
* @lnum: logical eraseblock number
* @offs: offset to start at (usually zero)
* @sbuf: scan buffer (must be of @c->leb_size bytes in size)
* @quiet: print no messages
*
* This function scans LEB number @lnum and returns complete information about
* its contents. Returns the scanned information in case of success and,
* %-EUCLEAN if the LEB neads recovery, and other negative error codes in case
* of failure.
*
* If @quiet is non-zero, this function does not print large and scary
* error messages and flash dumps in case of errors.
*/
struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
int offs, void *sbuf, int quiet)
{
void *buf = sbuf + offs;
int err, len = c->leb_size - offs;
struct ubifs_scan_leb *sleb;
sleb = ubifs_start_scan(c, lnum, offs, sbuf);
if (IS_ERR(sleb))
return sleb;
while (len >= 8) {
struct ubifs_ch *ch = buf;
int node_len, ret;
dbg_scan("look at LEB %d:%d (%d bytes left)",
lnum, offs, len);
cond_resched();
ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
if (ret > 0) {
/* Padding bytes or a valid padding node */
offs += ret;
buf += ret;
len -= ret;
continue;
}
if (ret == SCANNED_EMPTY_SPACE)
/* Empty space is checked later */
break;
switch (ret) {
case SCANNED_GARBAGE:
ubifs_err("garbage");
goto corrupted;
case SCANNED_A_NODE:
break;
case SCANNED_A_CORRUPT_NODE:
case SCANNED_A_BAD_PAD_NODE:
ubifs_err("bad node");
goto corrupted;
default:
ubifs_err("unknown");
err = -EINVAL;
goto error;
}
err = ubifs_add_snod(c, sleb, buf, offs);
if (err)
goto error;
node_len = ALIGN(le32_to_cpu(ch->len), 8);
offs += node_len;
buf += node_len;
len -= node_len;
}
if (offs % c->min_io_size) {
if (!quiet)
ubifs_err("empty space starts at non-aligned offset %d",
offs);
goto corrupted;
}
ubifs_end_scan(c, sleb, lnum, offs);
for (; len > 4; offs += 4, buf = buf + 4, len -= 4)
if (*(uint32_t *)buf != 0xffffffff)
break;
for (; len; offs++, buf++, len--)
if (*(uint8_t *)buf != 0xff) {
if (!quiet)
ubifs_err("corrupt empty space at LEB %d:%d",
lnum, offs);
goto corrupted;
}
return sleb;
corrupted:
if (!quiet) {
ubifs_scanned_corruption(c, lnum, offs, buf);
ubifs_err("LEB %d scanning failed", lnum);
}
err = -EUCLEAN;
ubifs_scan_destroy(sleb);
return ERR_PTR(err);
error:
ubifs_err("LEB %d scanning failed, error %d", lnum, err);
ubifs_scan_destroy(sleb);
return ERR_PTR(err);
}
/**
* ubifs_scan_destroy - destroy LEB scanning information.
* @sleb: scanning information to free
*/
void ubifs_scan_destroy(struct ubifs_scan_leb *sleb)
{
struct ubifs_scan_node *node;
struct list_head *head;
head = &sleb->nodes;
while (!list_empty(head)) {
node = list_entry(head->next, struct ubifs_scan_node, list);
list_del(&node->list);
kfree(node);
}
kfree(sleb);
}

331
fs/ubifs/shrinker.c Normal file
View file

@ -0,0 +1,331 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Artem Bityutskiy (Битюцкий Артём)
* Adrian Hunter
*/
/*
* This file implements UBIFS shrinker which evicts clean znodes from the TNC
* tree when Linux VM needs more RAM.
*
* We do not implement any LRU lists to find oldest znodes to free because it
* would add additional overhead to the file system fast paths. So the shrinker
* just walks the TNC tree when searching for znodes to free.
*
* If the root of a TNC sub-tree is clean and old enough, then the children are
* also clean and old enough. So the shrinker walks the TNC in level order and
* dumps entire sub-trees.
*
* The age of znodes is just the time-stamp when they were last looked at.
* The current shrinker first tries to evict old znodes, then young ones.
*
* Since the shrinker is global, it has to protect against races with FS
* un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
*/
#include "ubifs.h"
/* List of all UBIFS file-system instances */
LIST_HEAD(ubifs_infos);
/*
* We number each shrinker run and record the number on the ubifs_info structure
* so that we can easily work out which ubifs_info structures have already been
* done by the current run.
*/
static unsigned int shrinker_run_no;
/* Protects 'ubifs_infos' list */
DEFINE_SPINLOCK(ubifs_infos_lock);
/* Global clean znode counter (for all mounted UBIFS instances) */
atomic_long_t ubifs_clean_zn_cnt;
/**
* shrink_tnc - shrink TNC tree.
* @c: UBIFS file-system description object
* @nr: number of znodes to free
* @age: the age of znodes to free
* @contention: if any contention, this is set to %1
*
* This function traverses TNC tree and frees clean znodes. It does not free
* clean znodes which younger then @age. Returns number of freed znodes.
*/
static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
{
int total_freed = 0;
struct ubifs_znode *znode, *zprev;
int time = get_seconds();
ubifs_assert(mutex_is_locked(&c->umount_mutex));
ubifs_assert(mutex_is_locked(&c->tnc_mutex));
if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
return 0;
/*
* Traverse the TNC tree in levelorder manner, so that it is possible
* to destroy large sub-trees. Indeed, if a znode is old, then all its
* children are older or of the same age.
*
* Note, we are holding 'c->tnc_mutex', so we do not have to lock the
* 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
* changed only when the 'c->tnc_mutex' is held.
*/
zprev = NULL;
znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
while (znode && total_freed < nr &&
atomic_long_read(&c->clean_zn_cnt) > 0) {
int freed;
/*
* If the znode is clean, but it is in the 'c->cnext' list, this
* means that this znode has just been written to flash as a
* part of commit and was marked clean. They will be removed
* from the list at end commit. We cannot change the list,
* because it is not protected by any mutex (design decision to
* make commit really independent and parallel to main I/O). So
* we just skip these znodes.
*
* Note, the 'clean_zn_cnt' counters are not updated until
* after the commit, so the UBIFS shrinker does not report
* the znodes which are in the 'c->cnext' list as freeable.
*
* Also note, if the root of a sub-tree is not in 'c->cnext',
* then the whole sub-tree is not in 'c->cnext' as well, so it
* is safe to dump whole sub-tree.
*/
if (znode->cnext) {
/*
* Very soon these znodes will be removed from the list
* and become freeable.
*/
*contention = 1;
} else if (!ubifs_zn_dirty(znode) &&
abs(time - znode->time) >= age) {
if (znode->parent)
znode->parent->zbranch[znode->iip].znode = NULL;
else
c->zroot.znode = NULL;
freed = ubifs_destroy_tnc_subtree(znode);
atomic_long_sub(freed, &ubifs_clean_zn_cnt);
atomic_long_sub(freed, &c->clean_zn_cnt);
total_freed += freed;
znode = zprev;
}
if (unlikely(!c->zroot.znode))
break;
zprev = znode;
znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
cond_resched();
}
return total_freed;
}
/**
* shrink_tnc_trees - shrink UBIFS TNC trees.
* @nr: number of znodes to free
* @age: the age of znodes to free
* @contention: if any contention, this is set to %1
*
* This function walks the list of mounted UBIFS file-systems and frees clean
* znodes which are older than @age, until at least @nr znodes are freed.
* Returns the number of freed znodes.
*/
static int shrink_tnc_trees(int nr, int age, int *contention)
{
struct ubifs_info *c;
struct list_head *p;
unsigned int run_no;
int freed = 0;
spin_lock(&ubifs_infos_lock);
do {
run_no = ++shrinker_run_no;
} while (run_no == 0);
/* Iterate over all mounted UBIFS file-systems and try to shrink them */
p = ubifs_infos.next;
while (p != &ubifs_infos) {
c = list_entry(p, struct ubifs_info, infos_list);
/*
* We move the ones we do to the end of the list, so we stop
* when we see one we have already done.
*/
if (c->shrinker_run_no == run_no)
break;
if (!mutex_trylock(&c->umount_mutex)) {
/* Some un-mount is in progress, try next FS */
*contention = 1;
p = p->next;
continue;
}
/*
* We're holding 'c->umount_mutex', so the file-system won't go
* away.
*/
if (!mutex_trylock(&c->tnc_mutex)) {
mutex_unlock(&c->umount_mutex);
*contention = 1;
p = p->next;
continue;
}
spin_unlock(&ubifs_infos_lock);
/*
* OK, now we have TNC locked, the file-system cannot go away -
* it is safe to reap the cache.
*/
c->shrinker_run_no = run_no;
freed += shrink_tnc(c, nr, age, contention);
mutex_unlock(&c->tnc_mutex);
spin_lock(&ubifs_infos_lock);
/* Get the next list element before we move this one */
p = p->next;
/*
* Move this one to the end of the list to provide some
* fairness.
*/
list_move_tail(&c->infos_list, &ubifs_infos);
mutex_unlock(&c->umount_mutex);
if (freed >= nr)
break;
}
spin_unlock(&ubifs_infos_lock);
return freed;
}
/**
* kick_a_thread - kick a background thread to start commit.
*
* This function kicks a background thread to start background commit. Returns
* %-1 if a thread was kicked or there is another reason to assume the memory
* will soon be freed or become freeable. If there are no dirty znodes, returns
* %0.
*/
static int kick_a_thread(void)
{
int i;
struct ubifs_info *c;
/*
* Iterate over all mounted UBIFS file-systems and find out if there is
* already an ongoing commit operation there. If no, then iterate for
* the second time and initiate background commit.
*/
spin_lock(&ubifs_infos_lock);
for (i = 0; i < 2; i++) {
list_for_each_entry(c, &ubifs_infos, infos_list) {
long dirty_zn_cnt;
if (!mutex_trylock(&c->umount_mutex)) {
/*
* Some un-mount is in progress, it will
* certainly free memory, so just return.
*/
spin_unlock(&ubifs_infos_lock);
return -1;
}
dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
c->ro_mount || c->ro_error) {
mutex_unlock(&c->umount_mutex);
continue;
}
if (c->cmt_state != COMMIT_RESTING) {
spin_unlock(&ubifs_infos_lock);
mutex_unlock(&c->umount_mutex);
return -1;
}
if (i == 1) {
list_move_tail(&c->infos_list, &ubifs_infos);
spin_unlock(&ubifs_infos_lock);
ubifs_request_bg_commit(c);
mutex_unlock(&c->umount_mutex);
return -1;
}
mutex_unlock(&c->umount_mutex);
}
}
spin_unlock(&ubifs_infos_lock);
return 0;
}
unsigned long ubifs_shrink_count(struct shrinker *shrink,
struct shrink_control *sc)
{
long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
/*
* Due to the way UBIFS updates the clean znode counter it may
* temporarily be negative.
*/
return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
}
unsigned long ubifs_shrink_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
unsigned long nr = sc->nr_to_scan;
int contention = 0;
unsigned long freed;
long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
if (!clean_zn_cnt) {
/*
* No clean znodes, nothing to reap. All we can do in this case
* is to kick background threads to start commit, which will
* probably make clean znodes which, in turn, will be freeable.
* And we return -1 which means will make VM call us again
* later.
*/
dbg_tnc("no clean znodes, kick a thread");
return kick_a_thread();
}
freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
if (freed >= nr)
goto out;
dbg_tnc("not enough old znodes, try to free young ones");
freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
if (freed >= nr)
goto out;
dbg_tnc("not enough young znodes, free all");
freed += shrink_tnc_trees(nr - freed, 0, &contention);
if (!freed && contention) {
dbg_tnc("freed nothing, but contention");
return SHRINK_STOP;
}
out:
dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
return freed;
}

2301
fs/ubifs/super.c Normal file

File diff suppressed because it is too large Load diff

3327
fs/ubifs/tnc.c Normal file

File diff suppressed because it is too large Load diff

1071
fs/ubifs/tnc_commit.c Normal file

File diff suppressed because it is too large Load diff

494
fs/ubifs/tnc_misc.c Normal file
View file

@ -0,0 +1,494 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Adrian Hunter
* Artem Bityutskiy (Битюцкий Артём)
*/
/*
* This file contains miscelanious TNC-related functions shared betweend
* different files. This file does not form any logically separate TNC
* sub-system. The file was created because there is a lot of TNC code and
* putting it all in one file would make that file too big and unreadable.
*/
#include "ubifs.h"
/**
* ubifs_tnc_levelorder_next - next TNC tree element in levelorder traversal.
* @zr: root of the subtree to traverse
* @znode: previous znode
*
* This function implements levelorder TNC traversal. The LNC is ignored.
* Returns the next element or %NULL if @znode is already the last one.
*/
struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr,
struct ubifs_znode *znode)
{
int level, iip, level_search = 0;
struct ubifs_znode *zn;
ubifs_assert(zr);
if (unlikely(!znode))
return zr;
if (unlikely(znode == zr)) {
if (znode->level == 0)
return NULL;
return ubifs_tnc_find_child(zr, 0);
}
level = znode->level;
iip = znode->iip;
while (1) {
ubifs_assert(znode->level <= zr->level);
/*
* First walk up until there is a znode with next branch to
* look at.
*/
while (znode->parent != zr && iip >= znode->parent->child_cnt) {
znode = znode->parent;
iip = znode->iip;
}
if (unlikely(znode->parent == zr &&
iip >= znode->parent->child_cnt)) {
/* This level is done, switch to the lower one */
level -= 1;
if (level_search || level < 0)
/*
* We were already looking for znode at lower
* level ('level_search'). As we are here
* again, it just does not exist. Or all levels
* were finished ('level < 0').
*/
return NULL;
level_search = 1;
iip = -1;
znode = ubifs_tnc_find_child(zr, 0);
ubifs_assert(znode);
}
/* Switch to the next index */
zn = ubifs_tnc_find_child(znode->parent, iip + 1);
if (!zn) {
/* No more children to look at, we have walk up */
iip = znode->parent->child_cnt;
continue;
}
/* Walk back down to the level we came from ('level') */
while (zn->level != level) {
znode = zn;
zn = ubifs_tnc_find_child(zn, 0);
if (!zn) {
/*
* This path is not too deep so it does not
* reach 'level'. Try next path.
*/
iip = znode->iip;
break;
}
}
if (zn) {
ubifs_assert(zn->level >= 0);
return zn;
}
}
}
/**
* ubifs_search_zbranch - search znode branch.
* @c: UBIFS file-system description object
* @znode: znode to search in
* @key: key to search for
* @n: znode branch slot number is returned here
*
* This is a helper function which search branch with key @key in @znode using
* binary search. The result of the search may be:
* o exact match, then %1 is returned, and the slot number of the branch is
* stored in @n;
* o no exact match, then %0 is returned and the slot number of the left
* closest branch is returned in @n; the slot if all keys in this znode are
* greater than @key, then %-1 is returned in @n.
*/
int ubifs_search_zbranch(const struct ubifs_info *c,
const struct ubifs_znode *znode,
const union ubifs_key *key, int *n)
{
int beg = 0, end = znode->child_cnt, uninitialized_var(mid);
int uninitialized_var(cmp);
const struct ubifs_zbranch *zbr = &znode->zbranch[0];
ubifs_assert(end > beg);
while (end > beg) {
mid = (beg + end) >> 1;
cmp = keys_cmp(c, key, &zbr[mid].key);
if (cmp > 0)
beg = mid + 1;
else if (cmp < 0)
end = mid;
else {
*n = mid;
return 1;
}
}
*n = end - 1;
/* The insert point is after *n */
ubifs_assert(*n >= -1 && *n < znode->child_cnt);
if (*n == -1)
ubifs_assert(keys_cmp(c, key, &zbr[0].key) < 0);
else
ubifs_assert(keys_cmp(c, key, &zbr[*n].key) > 0);
if (*n + 1 < znode->child_cnt)
ubifs_assert(keys_cmp(c, key, &zbr[*n + 1].key) < 0);
return 0;
}
/**
* ubifs_tnc_postorder_first - find first znode to do postorder tree traversal.
* @znode: znode to start at (root of the sub-tree to traverse)
*
* Find the lowest leftmost znode in a subtree of the TNC tree. The LNC is
* ignored.
*/
struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode)
{
if (unlikely(!znode))
return NULL;
while (znode->level > 0) {
struct ubifs_znode *child;
child = ubifs_tnc_find_child(znode, 0);
if (!child)
return znode;
znode = child;
}
return znode;
}
/**
* ubifs_tnc_postorder_next - next TNC tree element in postorder traversal.
* @znode: previous znode
*
* This function implements postorder TNC traversal. The LNC is ignored.
* Returns the next element or %NULL if @znode is already the last one.
*/
struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode)
{
struct ubifs_znode *zn;
ubifs_assert(znode);
if (unlikely(!znode->parent))
return NULL;
/* Switch to the next index in the parent */
zn = ubifs_tnc_find_child(znode->parent, znode->iip + 1);
if (!zn)
/* This is in fact the last child, return parent */
return znode->parent;
/* Go to the first znode in this new subtree */
return ubifs_tnc_postorder_first(zn);
}
/**
* ubifs_destroy_tnc_subtree - destroy all znodes connected to a subtree.
* @znode: znode defining subtree to destroy
*
* This function destroys subtree of the TNC tree. Returns number of clean
* znodes in the subtree.
*/
long ubifs_destroy_tnc_subtree(struct ubifs_znode *znode)
{
struct ubifs_znode *zn = ubifs_tnc_postorder_first(znode);
long clean_freed = 0;
int n;
ubifs_assert(zn);
while (1) {
for (n = 0; n < zn->child_cnt; n++) {
if (!zn->zbranch[n].znode)
continue;
if (zn->level > 0 &&
!ubifs_zn_dirty(zn->zbranch[n].znode))
clean_freed += 1;
cond_resched();
kfree(zn->zbranch[n].znode);
}
if (zn == znode) {
if (!ubifs_zn_dirty(zn))
clean_freed += 1;
kfree(zn);
return clean_freed;
}
zn = ubifs_tnc_postorder_next(zn);
}
}
/**
* read_znode - read an indexing node from flash and fill znode.
* @c: UBIFS file-system description object
* @lnum: LEB of the indexing node to read
* @offs: node offset
* @len: node length
* @znode: znode to read to
*
* This function reads an indexing node from the flash media and fills znode
* with the read data. Returns zero in case of success and a negative error
* code in case of failure. The read indexing node is validated and if anything
* is wrong with it, this function prints complaint messages and returns
* %-EINVAL.
*/
static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
struct ubifs_znode *znode)
{
int i, err, type, cmp;
struct ubifs_idx_node *idx;
idx = kmalloc(c->max_idx_node_sz, GFP_NOFS);
if (!idx)
return -ENOMEM;
err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs);
if (err < 0) {
kfree(idx);
return err;
}
znode->child_cnt = le16_to_cpu(idx->child_cnt);
znode->level = le16_to_cpu(idx->level);
dbg_tnc("LEB %d:%d, level %d, %d branch",
lnum, offs, znode->level, znode->child_cnt);
if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) {
ubifs_err("current fanout %d, branch count %d",
c->fanout, znode->child_cnt);
ubifs_err("max levels %d, znode level %d",
UBIFS_MAX_LEVELS, znode->level);
err = 1;
goto out_dump;
}
for (i = 0; i < znode->child_cnt; i++) {
const struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
struct ubifs_zbranch *zbr = &znode->zbranch[i];
key_read(c, &br->key, &zbr->key);
zbr->lnum = le32_to_cpu(br->lnum);
zbr->offs = le32_to_cpu(br->offs);
zbr->len = le32_to_cpu(br->len);
zbr->znode = NULL;
/* Validate branch */
if (zbr->lnum < c->main_first ||
zbr->lnum >= c->leb_cnt || zbr->offs < 0 ||
zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) {
ubifs_err("bad branch %d", i);
err = 2;
goto out_dump;
}
switch (key_type(c, &zbr->key)) {
case UBIFS_INO_KEY:
case UBIFS_DATA_KEY:
case UBIFS_DENT_KEY:
case UBIFS_XENT_KEY:
break;
default:
ubifs_err("bad key type at slot %d: %d",
i, key_type(c, &zbr->key));
err = 3;
goto out_dump;
}
if (znode->level)
continue;
type = key_type(c, &zbr->key);
if (c->ranges[type].max_len == 0) {
if (zbr->len != c->ranges[type].len) {
ubifs_err("bad target node (type %d) length (%d)",
type, zbr->len);
ubifs_err("have to be %d", c->ranges[type].len);
err = 4;
goto out_dump;
}
} else if (zbr->len < c->ranges[type].min_len ||
zbr->len > c->ranges[type].max_len) {
ubifs_err("bad target node (type %d) length (%d)",
type, zbr->len);
ubifs_err("have to be in range of %d-%d",
c->ranges[type].min_len,
c->ranges[type].max_len);
err = 5;
goto out_dump;
}
}
/*
* Ensure that the next key is greater or equivalent to the
* previous one.
*/
for (i = 0; i < znode->child_cnt - 1; i++) {
const union ubifs_key *key1, *key2;
key1 = &znode->zbranch[i].key;
key2 = &znode->zbranch[i + 1].key;
cmp = keys_cmp(c, key1, key2);
if (cmp > 0) {
ubifs_err("bad key order (keys %d and %d)", i, i + 1);
err = 6;
goto out_dump;
} else if (cmp == 0 && !is_hash_key(c, key1)) {
/* These can only be keys with colliding hash */
ubifs_err("keys %d and %d are not hashed but equivalent",
i, i + 1);
err = 7;
goto out_dump;
}
}
kfree(idx);
return 0;
out_dump:
ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err);
ubifs_dump_node(c, idx);
kfree(idx);
return -EINVAL;
}
/**
* ubifs_load_znode - load znode to TNC cache.
* @c: UBIFS file-system description object
* @zbr: znode branch
* @parent: znode's parent
* @iip: index in parent
*
* This function loads znode pointed to by @zbr into the TNC cache and
* returns pointer to it in case of success and a negative error code in case
* of failure.
*/
struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c,
struct ubifs_zbranch *zbr,
struct ubifs_znode *parent, int iip)
{
int err;
struct ubifs_znode *znode;
ubifs_assert(!zbr->znode);
/*
* A slab cache is not presently used for znodes because the znode size
* depends on the fanout which is stored in the superblock.
*/
znode = kzalloc(c->max_znode_sz, GFP_NOFS);
if (!znode)
return ERR_PTR(-ENOMEM);
err = read_znode(c, zbr->lnum, zbr->offs, zbr->len, znode);
if (err)
goto out;
atomic_long_inc(&c->clean_zn_cnt);
/*
* Increment the global clean znode counter as well. It is OK that
* global and per-FS clean znode counters may be inconsistent for some
* short time (because we might be preempted at this point), the global
* one is only used in shrinker.
*/
atomic_long_inc(&ubifs_clean_zn_cnt);
zbr->znode = znode;
znode->parent = parent;
znode->time = get_seconds();
znode->iip = iip;
return znode;
out:
kfree(znode);
return ERR_PTR(err);
}
/**
* ubifs_tnc_read_node - read a leaf node from the flash media.
* @c: UBIFS file-system description object
* @zbr: key and position of the node
* @node: node is returned here
*
* This function reads a node defined by @zbr from the flash media. Returns
* zero in case of success or a negative negative error code in case of
* failure.
*/
int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
void *node)
{
union ubifs_key key1, *key = &zbr->key;
int err, type = key_type(c, key);
struct ubifs_wbuf *wbuf;
/*
* 'zbr' has to point to on-flash node. The node may sit in a bud and
* may even be in a write buffer, so we have to take care about this.
*/
wbuf = ubifs_get_wbuf(c, zbr->lnum);
if (wbuf)
err = ubifs_read_node_wbuf(wbuf, node, type, zbr->len,
zbr->lnum, zbr->offs);
else
err = ubifs_read_node(c, node, type, zbr->len, zbr->lnum,
zbr->offs);
if (err) {
dbg_tnck(key, "key ");
return err;
}
/* Make sure the key of the read node is correct */
key_read(c, node + UBIFS_KEY_OFFSET, &key1);
if (!keys_eq(c, key, &key1)) {
ubifs_err("bad key in node at LEB %d:%d",
zbr->lnum, zbr->offs);
dbg_tnck(key, "looked for key ");
dbg_tnck(&key1, "but found node's key ");
ubifs_dump_node(c, node);
return -EINVAL;
}
return 0;
}

784
fs/ubifs/ubifs-media.h Normal file
View file

@ -0,0 +1,784 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Artem Bityutskiy (Битюцкий Артём)
* Adrian Hunter
*/
/*
* This file describes UBIFS on-flash format and contains definitions of all the
* relevant data structures and constants.
*
* All UBIFS on-flash objects are stored in the form of nodes. All nodes start
* with the UBIFS node magic number and have the same common header. Nodes
* always sit at 8-byte aligned positions on the media and node header sizes are
* also 8-byte aligned (except for the indexing node and the padding node).
*/
#ifndef __UBIFS_MEDIA_H__
#define __UBIFS_MEDIA_H__
/* UBIFS node magic number (must not have the padding byte first or last) */
#define UBIFS_NODE_MAGIC 0x06101831
/*
* UBIFS on-flash format version. This version is increased when the on-flash
* format is changing. If this happens, UBIFS is will support older versions as
* well. But older UBIFS code will not support newer formats. Format changes
* will be rare and only when absolutely necessary, e.g. to fix a bug or to add
* a new feature.
*
* UBIFS went into mainline kernel with format version 4. The older formats
* were development formats.
*/
#define UBIFS_FORMAT_VERSION 4
/*
* Read-only compatibility version. If the UBIFS format is changed, older UBIFS
* implementations will not be able to mount newer formats in read-write mode.
* However, depending on the change, it may be possible to mount newer formats
* in R/O mode. This is indicated by the R/O compatibility version which is
* stored in the super-block.
*
* This is needed to support boot-loaders which only need R/O mounting. With
* this flag it is possible to do UBIFS format changes without a need to update
* boot-loaders.
*/
#define UBIFS_RO_COMPAT_VERSION 0
/* Minimum logical eraseblock size in bytes */
#define UBIFS_MIN_LEB_SZ (15*1024)
/* Initial CRC32 value used when calculating CRC checksums */
#define UBIFS_CRC32_INIT 0xFFFFFFFFU
/*
* UBIFS does not try to compress data if its length is less than the below
* constant.
*/
#define UBIFS_MIN_COMPR_LEN 128
/*
* If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes
* shorter than uncompressed data length, UBIFS prefers to leave this data
* node uncompress, because it'll be read faster.
*/
#define UBIFS_MIN_COMPRESS_DIFF 64
/* Root inode number */
#define UBIFS_ROOT_INO 1
/* Lowest inode number used for regular inodes (not UBIFS-only internal ones) */
#define UBIFS_FIRST_INO 64
/*
* Maximum file name and extended attribute length (must be a multiple of 8,
* minus 1).
*/
#define UBIFS_MAX_NLEN 255
/* Maximum number of data journal heads */
#define UBIFS_MAX_JHEADS 1
/*
* Size of UBIFS data block. Note, UBIFS is not a block oriented file-system,
* which means that it does not treat the underlying media as consisting of
* blocks like in case of hard drives. Do not be confused. UBIFS block is just
* the maximum amount of data which one data node can have or which can be
* attached to an inode node.
*/
#define UBIFS_BLOCK_SIZE 4096
#define UBIFS_BLOCK_SHIFT 12
/* UBIFS padding byte pattern (must not be first or last byte of node magic) */
#define UBIFS_PADDING_BYTE 0xCE
/* Maximum possible key length */
#define UBIFS_MAX_KEY_LEN 16
/* Key length ("simple" format) */
#define UBIFS_SK_LEN 8
/* Minimum index tree fanout */
#define UBIFS_MIN_FANOUT 3
/* Maximum number of levels in UBIFS indexing B-tree */
#define UBIFS_MAX_LEVELS 512
/* Maximum amount of data attached to an inode in bytes */
#define UBIFS_MAX_INO_DATA UBIFS_BLOCK_SIZE
/* LEB Properties Tree fanout (must be power of 2) and fanout shift */
#define UBIFS_LPT_FANOUT 4
#define UBIFS_LPT_FANOUT_SHIFT 2
/* LEB Properties Tree bit field sizes */
#define UBIFS_LPT_CRC_BITS 16
#define UBIFS_LPT_CRC_BYTES 2
#define UBIFS_LPT_TYPE_BITS 4
/* The key is always at the same position in all keyed nodes */
#define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key)
/* Garbage collector journal head number */
#define UBIFS_GC_HEAD 0
/* Base journal head number */
#define UBIFS_BASE_HEAD 1
/* Data journal head number */
#define UBIFS_DATA_HEAD 2
/*
* LEB Properties Tree node types.
*
* UBIFS_LPT_PNODE: LPT leaf node (contains LEB properties)
* UBIFS_LPT_NNODE: LPT internal node
* UBIFS_LPT_LTAB: LPT's own lprops table
* UBIFS_LPT_LSAVE: LPT's save table (big model only)
* UBIFS_LPT_NODE_CNT: count of LPT node types
* UBIFS_LPT_NOT_A_NODE: all ones (15 for 4 bits) is never a valid node type
*/
enum {
UBIFS_LPT_PNODE,
UBIFS_LPT_NNODE,
UBIFS_LPT_LTAB,
UBIFS_LPT_LSAVE,
UBIFS_LPT_NODE_CNT,
UBIFS_LPT_NOT_A_NODE = (1 << UBIFS_LPT_TYPE_BITS) - 1,
};
/*
* UBIFS inode types.
*
* UBIFS_ITYPE_REG: regular file
* UBIFS_ITYPE_DIR: directory
* UBIFS_ITYPE_LNK: soft link
* UBIFS_ITYPE_BLK: block device node
* UBIFS_ITYPE_CHR: character device node
* UBIFS_ITYPE_FIFO: fifo
* UBIFS_ITYPE_SOCK: socket
* UBIFS_ITYPES_CNT: count of supported file types
*/
enum {
UBIFS_ITYPE_REG,
UBIFS_ITYPE_DIR,
UBIFS_ITYPE_LNK,
UBIFS_ITYPE_BLK,
UBIFS_ITYPE_CHR,
UBIFS_ITYPE_FIFO,
UBIFS_ITYPE_SOCK,
UBIFS_ITYPES_CNT,
};
/*
* Supported key hash functions.
*
* UBIFS_KEY_HASH_R5: R5 hash
* UBIFS_KEY_HASH_TEST: test hash which just returns first 4 bytes of the name
*/
enum {
UBIFS_KEY_HASH_R5,
UBIFS_KEY_HASH_TEST,
};
/*
* Supported key formats.
*
* UBIFS_SIMPLE_KEY_FMT: simple key format
*/
enum {
UBIFS_SIMPLE_KEY_FMT,
};
/*
* The simple key format uses 29 bits for storing UBIFS block number and hash
* value.
*/
#define UBIFS_S_KEY_BLOCK_BITS 29
#define UBIFS_S_KEY_BLOCK_MASK 0x1FFFFFFF
#define UBIFS_S_KEY_HASH_BITS UBIFS_S_KEY_BLOCK_BITS
#define UBIFS_S_KEY_HASH_MASK UBIFS_S_KEY_BLOCK_MASK
/*
* Key types.
*
* UBIFS_INO_KEY: inode node key
* UBIFS_DATA_KEY: data node key
* UBIFS_DENT_KEY: directory entry node key
* UBIFS_XENT_KEY: extended attribute entry key
* UBIFS_KEY_TYPES_CNT: number of supported key types
*/
enum {
UBIFS_INO_KEY,
UBIFS_DATA_KEY,
UBIFS_DENT_KEY,
UBIFS_XENT_KEY,
UBIFS_KEY_TYPES_CNT,
};
/* Count of LEBs reserved for the superblock area */
#define UBIFS_SB_LEBS 1
/* Count of LEBs reserved for the master area */
#define UBIFS_MST_LEBS 2
/* First LEB of the superblock area */
#define UBIFS_SB_LNUM 0
/* First LEB of the master area */
#define UBIFS_MST_LNUM (UBIFS_SB_LNUM + UBIFS_SB_LEBS)
/* First LEB of the log area */
#define UBIFS_LOG_LNUM (UBIFS_MST_LNUM + UBIFS_MST_LEBS)
/*
* The below constants define the absolute minimum values for various UBIFS
* media areas. Many of them actually depend of flash geometry and the FS
* configuration (number of journal heads, orphan LEBs, etc). This means that
* the smallest volume size which can be used for UBIFS cannot be pre-defined
* by these constants. The file-system that meets the below limitation will not
* necessarily mount. UBIFS does run-time calculations and validates the FS
* size.
*/
/* Minimum number of logical eraseblocks in the log */
#define UBIFS_MIN_LOG_LEBS 2
/* Minimum number of bud logical eraseblocks (one for each head) */
#define UBIFS_MIN_BUD_LEBS 3
/* Minimum number of journal logical eraseblocks */
#define UBIFS_MIN_JNL_LEBS (UBIFS_MIN_LOG_LEBS + UBIFS_MIN_BUD_LEBS)
/* Minimum number of LPT area logical eraseblocks */
#define UBIFS_MIN_LPT_LEBS 2
/* Minimum number of orphan area logical eraseblocks */
#define UBIFS_MIN_ORPH_LEBS 1
/*
* Minimum number of main area logical eraseblocks (buds, 3 for the index, 1
* for GC, 1 for deletions, and at least 1 for committed data).
*/
#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 6)
/* Minimum number of logical eraseblocks */
#define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \
UBIFS_MIN_LOG_LEBS + UBIFS_MIN_LPT_LEBS + \
UBIFS_MIN_ORPH_LEBS + UBIFS_MIN_MAIN_LEBS)
/* Node sizes (N.B. these are guaranteed to be multiples of 8) */
#define UBIFS_CH_SZ sizeof(struct ubifs_ch)
#define UBIFS_INO_NODE_SZ sizeof(struct ubifs_ino_node)
#define UBIFS_DATA_NODE_SZ sizeof(struct ubifs_data_node)
#define UBIFS_DENT_NODE_SZ sizeof(struct ubifs_dent_node)
#define UBIFS_TRUN_NODE_SZ sizeof(struct ubifs_trun_node)
#define UBIFS_PAD_NODE_SZ sizeof(struct ubifs_pad_node)
#define UBIFS_SB_NODE_SZ sizeof(struct ubifs_sb_node)
#define UBIFS_MST_NODE_SZ sizeof(struct ubifs_mst_node)
#define UBIFS_REF_NODE_SZ sizeof(struct ubifs_ref_node)
#define UBIFS_IDX_NODE_SZ sizeof(struct ubifs_idx_node)
#define UBIFS_CS_NODE_SZ sizeof(struct ubifs_cs_node)
#define UBIFS_ORPH_NODE_SZ sizeof(struct ubifs_orph_node)
/* Extended attribute entry nodes are identical to directory entry nodes */
#define UBIFS_XENT_NODE_SZ UBIFS_DENT_NODE_SZ
/* Only this does not have to be multiple of 8 bytes */
#define UBIFS_BRANCH_SZ sizeof(struct ubifs_branch)
/* Maximum node sizes (N.B. these are guaranteed to be multiples of 8) */
#define UBIFS_MAX_DATA_NODE_SZ (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE)
#define UBIFS_MAX_INO_NODE_SZ (UBIFS_INO_NODE_SZ + UBIFS_MAX_INO_DATA)
#define UBIFS_MAX_DENT_NODE_SZ (UBIFS_DENT_NODE_SZ + UBIFS_MAX_NLEN + 1)
#define UBIFS_MAX_XENT_NODE_SZ UBIFS_MAX_DENT_NODE_SZ
/* The largest UBIFS node */
#define UBIFS_MAX_NODE_SZ UBIFS_MAX_INO_NODE_SZ
/*
* On-flash inode flags.
*
* UBIFS_COMPR_FL: use compression for this inode
* UBIFS_SYNC_FL: I/O on this inode has to be synchronous
* UBIFS_IMMUTABLE_FL: inode is immutable
* UBIFS_APPEND_FL: writes to the inode may only append data
* UBIFS_DIRSYNC_FL: I/O on this directory inode has to be synchronous
* UBIFS_XATTR_FL: this inode is the inode for an extended attribute value
*
* Note, these are on-flash flags which correspond to ioctl flags
* (@FS_COMPR_FL, etc). They have the same values now, but generally, do not
* have to be the same.
*/
enum {
UBIFS_COMPR_FL = 0x01,
UBIFS_SYNC_FL = 0x02,
UBIFS_IMMUTABLE_FL = 0x04,
UBIFS_APPEND_FL = 0x08,
UBIFS_DIRSYNC_FL = 0x10,
UBIFS_XATTR_FL = 0x20,
};
/* Inode flag bits used by UBIFS */
#define UBIFS_FL_MASK 0x0000001F
/*
* UBIFS compression algorithms.
*
* UBIFS_COMPR_NONE: no compression
* UBIFS_COMPR_LZO: LZO compression
* UBIFS_COMPR_ZLIB: ZLIB compression
* UBIFS_COMPR_TYPES_CNT: count of supported compression types
*/
enum {
UBIFS_COMPR_NONE,
UBIFS_COMPR_LZO,
UBIFS_COMPR_ZLIB,
UBIFS_COMPR_TYPES_CNT,
};
/*
* UBIFS node types.
*
* UBIFS_INO_NODE: inode node
* UBIFS_DATA_NODE: data node
* UBIFS_DENT_NODE: directory entry node
* UBIFS_XENT_NODE: extended attribute node
* UBIFS_TRUN_NODE: truncation node
* UBIFS_PAD_NODE: padding node
* UBIFS_SB_NODE: superblock node
* UBIFS_MST_NODE: master node
* UBIFS_REF_NODE: LEB reference node
* UBIFS_IDX_NODE: index node
* UBIFS_CS_NODE: commit start node
* UBIFS_ORPH_NODE: orphan node
* UBIFS_NODE_TYPES_CNT: count of supported node types
*
* Note, we index arrays by these numbers, so keep them low and contiguous.
* Node type constants for inodes, direntries and so on have to be the same as
* corresponding key type constants.
*/
enum {
UBIFS_INO_NODE,
UBIFS_DATA_NODE,
UBIFS_DENT_NODE,
UBIFS_XENT_NODE,
UBIFS_TRUN_NODE,
UBIFS_PAD_NODE,
UBIFS_SB_NODE,
UBIFS_MST_NODE,
UBIFS_REF_NODE,
UBIFS_IDX_NODE,
UBIFS_CS_NODE,
UBIFS_ORPH_NODE,
UBIFS_NODE_TYPES_CNT,
};
/*
* Master node flags.
*
* UBIFS_MST_DIRTY: rebooted uncleanly - master node is dirty
* UBIFS_MST_NO_ORPHS: no orphan inodes present
* UBIFS_MST_RCVRY: written by recovery
*/
enum {
UBIFS_MST_DIRTY = 1,
UBIFS_MST_NO_ORPHS = 2,
UBIFS_MST_RCVRY = 4,
};
/*
* Node group type (used by recovery to recover whole group or none).
*
* UBIFS_NO_NODE_GROUP: this node is not part of a group
* UBIFS_IN_NODE_GROUP: this node is a part of a group
* UBIFS_LAST_OF_NODE_GROUP: this node is the last in a group
*/
enum {
UBIFS_NO_NODE_GROUP = 0,
UBIFS_IN_NODE_GROUP,
UBIFS_LAST_OF_NODE_GROUP,
};
/*
* Superblock flags.
*
* UBIFS_FLG_BIGLPT: if "big" LPT model is used if set
* UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed
*/
enum {
UBIFS_FLG_BIGLPT = 0x02,
UBIFS_FLG_SPACE_FIXUP = 0x04,
};
/**
* struct ubifs_ch - common header node.
* @magic: UBIFS node magic number (%UBIFS_NODE_MAGIC)
* @crc: CRC-32 checksum of the node header
* @sqnum: sequence number
* @len: full node length
* @node_type: node type
* @group_type: node group type
* @padding: reserved for future, zeroes
*
* Every UBIFS node starts with this common part. If the node has a key, the
* key always goes next.
*/
struct ubifs_ch {
__le32 magic;
__le32 crc;
__le64 sqnum;
__le32 len;
__u8 node_type;
__u8 group_type;
__u8 padding[2];
} __packed;
/**
* union ubifs_dev_desc - device node descriptor.
* @new: new type device descriptor
* @huge: huge type device descriptor
*
* This data structure describes major/minor numbers of a device node. In an
* inode is a device node then its data contains an object of this type. UBIFS
* uses standard Linux "new" and "huge" device node encodings.
*/
union ubifs_dev_desc {
__le32 new;
__le64 huge;
} __packed;
/**
* struct ubifs_ino_node - inode node.
* @ch: common header
* @key: node key
* @creat_sqnum: sequence number at time of creation
* @size: inode size in bytes (amount of uncompressed data)
* @atime_sec: access time seconds
* @ctime_sec: creation time seconds
* @mtime_sec: modification time seconds
* @atime_nsec: access time nanoseconds
* @ctime_nsec: creation time nanoseconds
* @mtime_nsec: modification time nanoseconds
* @nlink: number of hard links
* @uid: owner ID
* @gid: group ID
* @mode: access flags
* @flags: per-inode flags (%UBIFS_COMPR_FL, %UBIFS_SYNC_FL, etc)
* @data_len: inode data length
* @xattr_cnt: count of extended attributes this inode has
* @xattr_size: summarized size of all extended attributes in bytes
* @padding1: reserved for future, zeroes
* @xattr_names: sum of lengths of all extended attribute names belonging to
* this inode
* @compr_type: compression type used for this inode
* @padding2: reserved for future, zeroes
* @data: data attached to the inode
*
* Note, even though inode compression type is defined by @compr_type, some
* nodes of this inode may be compressed with different compressor - this
* happens if compression type is changed while the inode already has data
* nodes. But @compr_type will be use for further writes to the inode.
*
* Note, do not forget to amend 'zero_ino_node_unused()' function when changing
* the padding fields.
*/
struct ubifs_ino_node {
struct ubifs_ch ch;
__u8 key[UBIFS_MAX_KEY_LEN];
__le64 creat_sqnum;
__le64 size;
__le64 atime_sec;
__le64 ctime_sec;
__le64 mtime_sec;
__le32 atime_nsec;
__le32 ctime_nsec;
__le32 mtime_nsec;
__le32 nlink;
__le32 uid;
__le32 gid;
__le32 mode;
__le32 flags;
__le32 data_len;
__le32 xattr_cnt;
__le32 xattr_size;
__u8 padding1[4]; /* Watch 'zero_ino_node_unused()' if changing! */
__le32 xattr_names;
__le16 compr_type;
__u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */
__u8 data[];
} __packed;
/**
* struct ubifs_dent_node - directory entry node.
* @ch: common header
* @key: node key
* @inum: target inode number
* @padding1: reserved for future, zeroes
* @type: type of the target inode (%UBIFS_ITYPE_REG, %UBIFS_ITYPE_DIR, etc)
* @nlen: name length
* @padding2: reserved for future, zeroes
* @name: zero-terminated name
*
* Note, do not forget to amend 'zero_dent_node_unused()' function when
* changing the padding fields.
*/
struct ubifs_dent_node {
struct ubifs_ch ch;
__u8 key[UBIFS_MAX_KEY_LEN];
__le64 inum;
__u8 padding1;
__u8 type;
__le16 nlen;
__u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */
__u8 name[];
} __packed;
/**
* struct ubifs_data_node - data node.
* @ch: common header
* @key: node key
* @size: uncompressed data size in bytes
* @compr_type: compression type (%UBIFS_COMPR_NONE, %UBIFS_COMPR_LZO, etc)
* @padding: reserved for future, zeroes
* @data: data
*
* Note, do not forget to amend 'zero_data_node_unused()' function when
* changing the padding fields.
*/
struct ubifs_data_node {
struct ubifs_ch ch;
__u8 key[UBIFS_MAX_KEY_LEN];
__le32 size;
__le16 compr_type;
__u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */
__u8 data[];
} __packed;
/**
* struct ubifs_trun_node - truncation node.
* @ch: common header
* @inum: truncated inode number
* @padding: reserved for future, zeroes
* @old_size: size before truncation
* @new_size: size after truncation
*
* This node exists only in the journal and never goes to the main area. Note,
* do not forget to amend 'zero_trun_node_unused()' function when changing the
* padding fields.
*/
struct ubifs_trun_node {
struct ubifs_ch ch;
__le32 inum;
__u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */
__le64 old_size;
__le64 new_size;
} __packed;
/**
* struct ubifs_pad_node - padding node.
* @ch: common header
* @pad_len: how many bytes after this node are unused (because padded)
* @padding: reserved for future, zeroes
*/
struct ubifs_pad_node {
struct ubifs_ch ch;
__le32 pad_len;
} __packed;
/**
* struct ubifs_sb_node - superblock node.
* @ch: common header
* @padding: reserved for future, zeroes
* @key_hash: type of hash function used in keys
* @key_fmt: format of the key
* @flags: file-system flags (%UBIFS_FLG_BIGLPT, etc)
* @min_io_size: minimal input/output unit size
* @leb_size: logical eraseblock size in bytes
* @leb_cnt: count of LEBs used by file-system
* @max_leb_cnt: maximum count of LEBs used by file-system
* @max_bud_bytes: maximum amount of data stored in buds
* @log_lebs: log size in logical eraseblocks
* @lpt_lebs: number of LEBs used for lprops table
* @orph_lebs: number of LEBs used for recording orphans
* @jhead_cnt: count of journal heads
* @fanout: tree fanout (max. number of links per indexing node)
* @lsave_cnt: number of LEB numbers in LPT's save table
* @fmt_version: UBIFS on-flash format version
* @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
* @padding1: reserved for future, zeroes
* @rp_uid: reserve pool UID
* @rp_gid: reserve pool GID
* @rp_size: size of the reserved pool in bytes
* @padding2: reserved for future, zeroes
* @time_gran: time granularity in nanoseconds
* @uuid: UUID generated when the file system image was created
* @ro_compat_version: UBIFS R/O compatibility version
*/
struct ubifs_sb_node {
struct ubifs_ch ch;
__u8 padding[2];
__u8 key_hash;
__u8 key_fmt;
__le32 flags;
__le32 min_io_size;
__le32 leb_size;
__le32 leb_cnt;
__le32 max_leb_cnt;
__le64 max_bud_bytes;
__le32 log_lebs;
__le32 lpt_lebs;
__le32 orph_lebs;
__le32 jhead_cnt;
__le32 fanout;
__le32 lsave_cnt;
__le32 fmt_version;
__le16 default_compr;
__u8 padding1[2];
__le32 rp_uid;
__le32 rp_gid;
__le64 rp_size;
__le32 time_gran;
__u8 uuid[16];
__le32 ro_compat_version;
__u8 padding2[3968];
} __packed;
/**
* struct ubifs_mst_node - master node.
* @ch: common header
* @highest_inum: highest inode number in the committed index
* @cmt_no: commit number
* @flags: various flags (%UBIFS_MST_DIRTY, etc)
* @log_lnum: start of the log
* @root_lnum: LEB number of the root indexing node
* @root_offs: offset within @root_lnum
* @root_len: root indexing node length
* @gc_lnum: LEB reserved for garbage collection (%-1 value means the LEB was
* not reserved and should be reserved on mount)
* @ihead_lnum: LEB number of index head
* @ihead_offs: offset of index head
* @index_size: size of index on flash
* @total_free: total free space in bytes
* @total_dirty: total dirty space in bytes
* @total_used: total used space in bytes (includes only data LEBs)
* @total_dead: total dead space in bytes (includes only data LEBs)
* @total_dark: total dark space in bytes (includes only data LEBs)
* @lpt_lnum: LEB number of LPT root nnode
* @lpt_offs: offset of LPT root nnode
* @nhead_lnum: LEB number of LPT head
* @nhead_offs: offset of LPT head
* @ltab_lnum: LEB number of LPT's own lprops table
* @ltab_offs: offset of LPT's own lprops table
* @lsave_lnum: LEB number of LPT's save table (big model only)
* @lsave_offs: offset of LPT's save table (big model only)
* @lscan_lnum: LEB number of last LPT scan
* @empty_lebs: number of empty logical eraseblocks
* @idx_lebs: number of indexing logical eraseblocks
* @leb_cnt: count of LEBs used by file-system
* @padding: reserved for future, zeroes
*/
struct ubifs_mst_node {
struct ubifs_ch ch;
__le64 highest_inum;
__le64 cmt_no;
__le32 flags;
__le32 log_lnum;
__le32 root_lnum;
__le32 root_offs;
__le32 root_len;
__le32 gc_lnum;
__le32 ihead_lnum;
__le32 ihead_offs;
__le64 index_size;
__le64 total_free;
__le64 total_dirty;
__le64 total_used;
__le64 total_dead;
__le64 total_dark;
__le32 lpt_lnum;
__le32 lpt_offs;
__le32 nhead_lnum;
__le32 nhead_offs;
__le32 ltab_lnum;
__le32 ltab_offs;
__le32 lsave_lnum;
__le32 lsave_offs;
__le32 lscan_lnum;
__le32 empty_lebs;
__le32 idx_lebs;
__le32 leb_cnt;
__u8 padding[344];
} __packed;
/**
* struct ubifs_ref_node - logical eraseblock reference node.
* @ch: common header
* @lnum: the referred logical eraseblock number
* @offs: start offset in the referred LEB
* @jhead: journal head number
* @padding: reserved for future, zeroes
*/
struct ubifs_ref_node {
struct ubifs_ch ch;
__le32 lnum;
__le32 offs;
__le32 jhead;
__u8 padding[28];
} __packed;
/**
* struct ubifs_branch - key/reference/length branch
* @lnum: LEB number of the target node
* @offs: offset within @lnum
* @len: target node length
* @key: key
*/
struct ubifs_branch {
__le32 lnum;
__le32 offs;
__le32 len;
__u8 key[];
} __packed;
/**
* struct ubifs_idx_node - indexing node.
* @ch: common header
* @child_cnt: number of child index nodes
* @level: tree level
* @branches: LEB number / offset / length / key branches
*/
struct ubifs_idx_node {
struct ubifs_ch ch;
__le16 child_cnt;
__le16 level;
__u8 branches[];
} __packed;
/**
* struct ubifs_cs_node - commit start node.
* @ch: common header
* @cmt_no: commit number
*/
struct ubifs_cs_node {
struct ubifs_ch ch;
__le64 cmt_no;
} __packed;
/**
* struct ubifs_orph_node - orphan node.
* @ch: common header
* @cmt_no: commit number (also top bit is set on the last node of the commit)
* @inos: inode numbers of orphans
*/
struct ubifs_orph_node {
struct ubifs_ch ch;
__le64 cmt_no;
__le64 inos[];
} __packed;
#endif /* __UBIFS_MEDIA_H__ */

1795
fs/ubifs/ubifs.h Normal file

File diff suppressed because it is too large Load diff

570
fs/ubifs/xattr.c Normal file
View file

@ -0,0 +1,570 @@
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Artem Bityutskiy (Битюцкий Артём)
* Adrian Hunter
*/
/*
* This file implements UBIFS extended attributes support.
*
* Extended attributes are implemented as regular inodes with attached data,
* which limits extended attribute size to UBIFS block size (4KiB). Names of
* extended attributes are described by extended attribute entries (xentries),
* which are almost identical to directory entries, but have different key type.
*
* In other words, the situation with extended attributes is very similar to
* directories. Indeed, any inode (but of course not xattr inodes) may have a
* number of associated xentries, just like directory inodes have associated
* directory entries. Extended attribute entries store the name of the extended
* attribute, the host inode number, and the extended attribute inode number.
* Similarly, direntries store the name, the parent and the target inode
* numbers. Thus, most of the common UBIFS mechanisms may be re-used for
* extended attributes.
*
* The number of extended attributes is not limited, but there is Linux
* limitation on the maximum possible size of the list of all extended
* attributes associated with an inode (%XATTR_LIST_MAX), so UBIFS makes sure
* the sum of all extended attribute names of the inode does not exceed that
* limit.
*
* Extended attributes are synchronous, which means they are written to the
* flash media synchronously and there is no write-back for extended attribute
* inodes. The extended attribute values are not stored in compressed form on
* the media.
*
* Since extended attributes are represented by regular inodes, they are cached
* in the VFS inode cache. The xentries are cached in the LNC cache (see
* tnc.c).
*
* ACL support is not implemented.
*/
#include "ubifs.h"
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/xattr.h>
#include <linux/posix_acl_xattr.h>
/*
* Limit the number of extended attributes per inode so that the total size
* (@xattr_size) is guaranteeded to fit in an 'unsigned int'.
*/
#define MAX_XATTRS_PER_INODE 65535
/*
* Extended attribute type constants.
*
* USER_XATTR: user extended attribute ("user.*")
* TRUSTED_XATTR: trusted extended attribute ("trusted.*)
* SECURITY_XATTR: security extended attribute ("security.*")
*/
enum {
USER_XATTR,
TRUSTED_XATTR,
SECURITY_XATTR,
};
static const struct inode_operations empty_iops;
static const struct file_operations empty_fops;
/**
* create_xattr - create an extended attribute.
* @c: UBIFS file-system description object
* @host: host inode
* @nm: extended attribute name
* @value: extended attribute value
* @size: size of extended attribute value
*
* This is a helper function which creates an extended attribute of name @nm
* and value @value for inode @host. The host inode is also updated on flash
* because the ctime and extended attribute accounting data changes. This
* function returns zero in case of success and a negative error code in case
* of failure.
*/
static int create_xattr(struct ubifs_info *c, struct inode *host,
const struct qstr *nm, const void *value, int size)
{
int err;
struct inode *inode;
struct ubifs_inode *ui, *host_ui = ubifs_inode(host);
struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
.new_ino_d = ALIGN(size, 8), .dirtied_ino = 1,
.dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE)
return -ENOSPC;
/*
* Linux limits the maximum size of the extended attribute names list
* to %XATTR_LIST_MAX. This means we should not allow creating more
* extended attributes if the name list becomes larger. This limitation
* is artificial for UBIFS, though.
*/
if (host_ui->xattr_names + host_ui->xattr_cnt +
nm->len + 1 > XATTR_LIST_MAX)
return -ENOSPC;
err = ubifs_budget_space(c, &req);
if (err)
return err;
inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_budg;
}
/* Re-define all operations to be "nothing" */
inode->i_mapping->a_ops = &empty_aops;
inode->i_op = &empty_iops;
inode->i_fop = &empty_fops;
inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA;
ui = ubifs_inode(inode);
ui->xattr = 1;
ui->flags |= UBIFS_XATTR_FL;
ui->data = kmemdup(value, size, GFP_NOFS);
if (!ui->data) {
err = -ENOMEM;
goto out_free;
}
inode->i_size = ui->ui_size = size;
ui->data_len = size;
mutex_lock(&host_ui->ui_mutex);
host->i_ctime = ubifs_current_time(host);
host_ui->xattr_cnt += 1;
host_ui->xattr_size += CALC_DENT_SIZE(nm->len);
host_ui->xattr_size += CALC_XATTR_BYTES(size);
host_ui->xattr_names += nm->len;
err = ubifs_jnl_update(c, host, nm, inode, 0, 1);
if (err)
goto out_cancel;
mutex_unlock(&host_ui->ui_mutex);
ubifs_release_budget(c, &req);
insert_inode_hash(inode);
iput(inode);
return 0;
out_cancel:
host_ui->xattr_cnt -= 1;
host_ui->xattr_size -= CALC_DENT_SIZE(nm->len);
host_ui->xattr_size -= CALC_XATTR_BYTES(size);
mutex_unlock(&host_ui->ui_mutex);
out_free:
make_bad_inode(inode);
iput(inode);
out_budg:
ubifs_release_budget(c, &req);
return err;
}
/**
* change_xattr - change an extended attribute.
* @c: UBIFS file-system description object
* @host: host inode
* @inode: extended attribute inode
* @value: extended attribute value
* @size: size of extended attribute value
*
* This helper function changes the value of extended attribute @inode with new
* data from @value. Returns zero in case of success and a negative error code
* in case of failure.
*/
static int change_xattr(struct ubifs_info *c, struct inode *host,
struct inode *inode, const void *value, int size)
{
int err;
struct ubifs_inode *host_ui = ubifs_inode(host);
struct ubifs_inode *ui = ubifs_inode(inode);
struct ubifs_budget_req req = { .dirtied_ino = 2,
.dirtied_ino_d = ALIGN(size, 8) + ALIGN(host_ui->data_len, 8) };
ubifs_assert(ui->data_len == inode->i_size);
err = ubifs_budget_space(c, &req);
if (err)
return err;
kfree(ui->data);
ui->data = kmemdup(value, size, GFP_NOFS);
if (!ui->data) {
err = -ENOMEM;
goto out_free;
}
inode->i_size = ui->ui_size = size;
ui->data_len = size;
mutex_lock(&host_ui->ui_mutex);
host->i_ctime = ubifs_current_time(host);
host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
host_ui->xattr_size += CALC_XATTR_BYTES(size);
/*
* It is important to write the host inode after the xattr inode
* because if the host inode gets synchronized (via 'fsync()'), then
* the extended attribute inode gets synchronized, because it goes
* before the host inode in the write-buffer.
*/
err = ubifs_jnl_change_xattr(c, inode, host);
if (err)
goto out_cancel;
mutex_unlock(&host_ui->ui_mutex);
ubifs_release_budget(c, &req);
return 0;
out_cancel:
host_ui->xattr_size -= CALC_XATTR_BYTES(size);
host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
mutex_unlock(&host_ui->ui_mutex);
make_bad_inode(inode);
out_free:
ubifs_release_budget(c, &req);
return err;
}
/**
* check_namespace - check extended attribute name-space.
* @nm: extended attribute name
*
* This function makes sure the extended attribute name belongs to one of the
* supported extended attribute name-spaces. Returns name-space index in case
* of success and a negative error code in case of failure.
*/
static int check_namespace(const struct qstr *nm)
{
int type;
if (nm->len > UBIFS_MAX_NLEN)
return -ENAMETOOLONG;
if (!strncmp(nm->name, XATTR_TRUSTED_PREFIX,
XATTR_TRUSTED_PREFIX_LEN)) {
if (nm->name[sizeof(XATTR_TRUSTED_PREFIX) - 1] == '\0')
return -EINVAL;
type = TRUSTED_XATTR;
} else if (!strncmp(nm->name, XATTR_USER_PREFIX,
XATTR_USER_PREFIX_LEN)) {
if (nm->name[XATTR_USER_PREFIX_LEN] == '\0')
return -EINVAL;
type = USER_XATTR;
} else if (!strncmp(nm->name, XATTR_SECURITY_PREFIX,
XATTR_SECURITY_PREFIX_LEN)) {
if (nm->name[sizeof(XATTR_SECURITY_PREFIX) - 1] == '\0')
return -EINVAL;
type = SECURITY_XATTR;
} else
return -EOPNOTSUPP;
return type;
}
static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum)
{
struct inode *inode;
inode = ubifs_iget(c->vfs_sb, inum);
if (IS_ERR(inode)) {
ubifs_err("dead extended attribute entry, error %d",
(int)PTR_ERR(inode));
return inode;
}
if (ubifs_inode(inode)->xattr)
return inode;
ubifs_err("corrupt extended attribute entry");
iput(inode);
return ERR_PTR(-EINVAL);
}
int ubifs_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
struct inode *inode, *host = dentry->d_inode;
struct ubifs_info *c = host->i_sb->s_fs_info;
struct qstr nm = QSTR_INIT(name, strlen(name));
struct ubifs_dent_node *xent;
union ubifs_key key;
int err, type;
dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", name,
host->i_ino, dentry, size);
ubifs_assert(mutex_is_locked(&host->i_mutex));
if (size > UBIFS_MAX_INO_DATA)
return -ERANGE;
type = check_namespace(&nm);
if (type < 0)
return type;
xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS);
if (!xent)
return -ENOMEM;
/*
* The extended attribute entries are stored in LNC, so multiple
* look-ups do not involve reading the flash.
*/
xent_key_init(c, &key, host->i_ino, &nm);
err = ubifs_tnc_lookup_nm(c, &key, xent, &nm);
if (err) {
if (err != -ENOENT)
goto out_free;
if (flags & XATTR_REPLACE)
/* We are asked not to create the xattr */
err = -ENODATA;
else
err = create_xattr(c, host, &nm, value, size);
goto out_free;
}
if (flags & XATTR_CREATE) {
/* We are asked not to replace the xattr */
err = -EEXIST;
goto out_free;
}
inode = iget_xattr(c, le64_to_cpu(xent->inum));
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_free;
}
err = change_xattr(c, host, inode, value, size);
iput(inode);
out_free:
kfree(xent);
return err;
}
ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
size_t size)
{
struct inode *inode, *host = dentry->d_inode;
struct ubifs_info *c = host->i_sb->s_fs_info;
struct qstr nm = QSTR_INIT(name, strlen(name));
struct ubifs_inode *ui;
struct ubifs_dent_node *xent;
union ubifs_key key;
int err;
dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name,
host->i_ino, dentry, size);
err = check_namespace(&nm);
if (err < 0)
return err;
xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS);
if (!xent)
return -ENOMEM;
xent_key_init(c, &key, host->i_ino, &nm);
err = ubifs_tnc_lookup_nm(c, &key, xent, &nm);
if (err) {
if (err == -ENOENT)
err = -ENODATA;
goto out_unlock;
}
inode = iget_xattr(c, le64_to_cpu(xent->inum));
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_unlock;
}
ui = ubifs_inode(inode);
ubifs_assert(inode->i_size == ui->data_len);
ubifs_assert(ubifs_inode(host)->xattr_size > ui->data_len);
if (buf) {
/* If @buf is %NULL we are supposed to return the length */
if (ui->data_len > size) {
ubifs_err("buffer size %zd, xattr len %d",
size, ui->data_len);
err = -ERANGE;
goto out_iput;
}
memcpy(buf, ui->data, ui->data_len);
}
err = ui->data_len;
out_iput:
iput(inode);
out_unlock:
kfree(xent);
return err;
}
ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
union ubifs_key key;
struct inode *host = dentry->d_inode;
struct ubifs_info *c = host->i_sb->s_fs_info;
struct ubifs_inode *host_ui = ubifs_inode(host);
struct ubifs_dent_node *xent, *pxent = NULL;
int err, len, written = 0;
struct qstr nm = { .name = NULL };
dbg_gen("ino %lu ('%pd'), buffer size %zd", host->i_ino,
dentry, size);
len = host_ui->xattr_names + host_ui->xattr_cnt;
if (!buffer)
/*
* We should return the minimum buffer size which will fit a
* null-terminated list of all the extended attribute names.
*/
return len;
if (len > size)
return -ERANGE;
lowest_xent_key(c, &key, host->i_ino);
while (1) {
int type;
xent = ubifs_tnc_next_ent(c, &key, &nm);
if (IS_ERR(xent)) {
err = PTR_ERR(xent);
break;
}
nm.name = xent->name;
nm.len = le16_to_cpu(xent->nlen);
type = check_namespace(&nm);
if (unlikely(type < 0)) {
err = type;
break;
}
/* Show trusted namespace only for "power" users */
if (type != TRUSTED_XATTR || capable(CAP_SYS_ADMIN)) {
memcpy(buffer + written, nm.name, nm.len + 1);
written += nm.len + 1;
}
kfree(pxent);
pxent = xent;
key_read(c, &xent->key, &key);
}
kfree(pxent);
if (err != -ENOENT) {
ubifs_err("cannot find next direntry, error %d", err);
return err;
}
ubifs_assert(written <= size);
return written;
}
static int remove_xattr(struct ubifs_info *c, struct inode *host,
struct inode *inode, const struct qstr *nm)
{
int err;
struct ubifs_inode *host_ui = ubifs_inode(host);
struct ubifs_inode *ui = ubifs_inode(inode);
struct ubifs_budget_req req = { .dirtied_ino = 2, .mod_dent = 1,
.dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
ubifs_assert(ui->data_len == inode->i_size);
err = ubifs_budget_space(c, &req);
if (err)
return err;
mutex_lock(&host_ui->ui_mutex);
host->i_ctime = ubifs_current_time(host);
host_ui->xattr_cnt -= 1;
host_ui->xattr_size -= CALC_DENT_SIZE(nm->len);
host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
host_ui->xattr_names -= nm->len;
err = ubifs_jnl_delete_xattr(c, host, inode, nm);
if (err)
goto out_cancel;
mutex_unlock(&host_ui->ui_mutex);
ubifs_release_budget(c, &req);
return 0;
out_cancel:
host_ui->xattr_cnt += 1;
host_ui->xattr_size += CALC_DENT_SIZE(nm->len);
host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
mutex_unlock(&host_ui->ui_mutex);
ubifs_release_budget(c, &req);
make_bad_inode(inode);
return err;
}
int ubifs_removexattr(struct dentry *dentry, const char *name)
{
struct inode *inode, *host = dentry->d_inode;
struct ubifs_info *c = host->i_sb->s_fs_info;
struct qstr nm = QSTR_INIT(name, strlen(name));
struct ubifs_dent_node *xent;
union ubifs_key key;
int err;
dbg_gen("xattr '%s', ino %lu ('%pd')", name,
host->i_ino, dentry);
ubifs_assert(mutex_is_locked(&host->i_mutex));
err = check_namespace(&nm);
if (err < 0)
return err;
xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS);
if (!xent)
return -ENOMEM;
xent_key_init(c, &key, host->i_ino, &nm);
err = ubifs_tnc_lookup_nm(c, &key, xent, &nm);
if (err) {
if (err == -ENOENT)
err = -ENODATA;
goto out_free;
}
inode = iget_xattr(c, le64_to_cpu(xent->inum));
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_free;
}
ubifs_assert(inode->i_nlink == 1);
clear_nlink(inode);
err = remove_xattr(c, host, inode, &nm);
if (err)
set_nlink(inode, 1);
/* If @i_nlink is 0, 'iput()' will delete the inode */
iput(inode);
out_free:
kfree(xent);
return err;
}