diff options
75 files changed, 26583 insertions, 6800 deletions
@@ -3338,6 +3338,9 @@ FIT uImage format: Adds the MTD partitioning infrastructure from the Linux kernel. Needed for UBI support. + CONFIG_MTD_NAND_VERIFY_WRITE + verify if the written data is correct reread. + - UBI support CONFIG_CMD_UBI @@ -3351,6 +3354,64 @@ FIT uImage format: Make the verbose messages from UBI stop printing. This leaves warnings and errors enabled. + + CONFIG_MTD_UBI_WL_THRESHOLD + This parameter defines the maximum difference between the highest + erase counter value and the lowest erase counter value of eraseblocks + of UBI devices. When this threshold is exceeded, UBI starts performing + wear leveling by means of moving data from eraseblock with low erase + counter to eraseblocks with high erase counter. + + The default value should be OK for SLC NAND flashes, NOR flashes and + other flashes which have eraseblock life-cycle 100000 or more. + However, in case of MLC NAND flashes which typically have eraseblock + life-cycle less than 10000, the threshold should be lessened (e.g., + to 128 or 256, although it does not have to be power of 2). + + default: 4096 + + CONFIG_MTD_UBI_BEB_LIMIT + This option specifies the maximum bad physical eraseblocks UBI + expects on the MTD device (per 1024 eraseblocks). If the + underlying flash does not admit of bad eraseblocks (e.g. NOR + flash), this value is ignored. + + NAND datasheets often specify the minimum and maximum NVM + (Number of Valid Blocks) for the flashes' endurance lifetime. + The maximum expected bad eraseblocks per 1024 eraseblocks + then can be calculated as "1024 * (1 - MinNVB / MaxNVB)", + which gives 20 for most NANDs (MaxNVB is basically the total + count of eraseblocks on the chip). + + To put it differently, if this value is 20, UBI will try to + reserve about 1.9% of physical eraseblocks for bad blocks + handling. And that will be 1.9% of eraseblocks on the entire + NAND chip, not just the MTD partition UBI attaches. This means + that if you have, say, a NAND flash chip admits maximum 40 bad + eraseblocks, and it is split on two MTD partitions of the same + size, UBI will reserve 40 eraseblocks when attaching a + partition. + + default: 20 + + CONFIG_MTD_UBI_FASTMAP + Fastmap is a mechanism which allows attaching an UBI device + in nearly constant time. Instead of scanning the whole MTD device it + only has to locate a checkpoint (called fastmap) on the device. + The on-flash fastmap contains all information needed to attach + the device. Using fastmap makes only sense on large devices where + attaching by scanning takes long. UBI will not automatically install + a fastmap on old images, but you can set the UBI parameter + CONFIG_MTD_UBI_FASTMAP_AUTOCONVERT to 1 if you want so. Please note + that fastmap-enabled images are still usable with UBI implementations + without fastmap support. On typical flash devices the whole fastmap + fits into one PEB. UBI will reserve PEBs to hold two fastmaps. + + CONFIG_MTD_UBI_FASTMAP_AUTOCONVERT + Set this parameter to enable fastmap automatically on images + without a fastmap. + default: 0 + - UBIFS support CONFIG_CMD_UBIFS diff --git a/board/prodrive/alpr/nand.c b/board/prodrive/alpr/nand.c index 50e8d82..5427de5 100644 --- a/board/prodrive/alpr/nand.c +++ b/board/prodrive/alpr/nand.c @@ -93,6 +93,7 @@ static void alpr_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len) } } +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) static int alpr_nand_verify_buf(struct mtd_info *mtd, const u_char *buf, int len) { int i; @@ -103,6 +104,7 @@ static int alpr_nand_verify_buf(struct mtd_info *mtd, const u_char *buf, int len return 0; } +#endif static int alpr_nand_dev_ready(struct mtd_info *mtd) { @@ -128,7 +130,9 @@ int board_nand_init(struct nand_chip *nand) nand->read_byte = alpr_nand_read_byte; nand->write_buf = alpr_nand_write_buf; nand->read_buf = alpr_nand_read_buf; +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) nand->verify_buf = alpr_nand_verify_buf; +#endif nand->dev_ready = alpr_nand_dev_ready; return 0; diff --git a/board/socrates/nand.c b/board/socrates/nand.c index 3802c7e..7394478 100644 --- a/board/socrates/nand.c +++ b/board/socrates/nand.c @@ -18,7 +18,9 @@ static void sc_nand_write_buf(struct mtd_info *mtd, const u_char *buf, int len); static u_char sc_nand_read_byte(struct mtd_info *mtd); static u16 sc_nand_read_word(struct mtd_info *mtd); static void sc_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len); +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) static int sc_nand_verify_buf(struct mtd_info *mtd, const u_char *buf, int len); +#endif static int sc_nand_device_ready(struct mtd_info *mtdinfo); #define FPGA_NAND_CMD_MASK (0x7 << 28) @@ -100,6 +102,7 @@ static void sc_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len) } } +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) /** * sc_nand_verify_buf - Verify chip data against buffer * @mtd: MTD device structure @@ -116,6 +119,7 @@ static int sc_nand_verify_buf(struct mtd_info *mtd, const u_char *buf, int len) } return 0; } +#endif /** * sc_nand_device_ready - Check the NAND device is ready for next command. @@ -174,7 +178,9 @@ int board_nand_init(struct nand_chip *nand) nand->read_word = sc_nand_read_word; nand->write_buf = sc_nand_write_buf; nand->read_buf = sc_nand_read_buf; +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) nand->verify_buf = sc_nand_verify_buf; +#endif return 0; } diff --git a/board/tqc/tqm8272/nand.c b/board/tqc/tqm8272/nand.c index 4925b8d..7fb2dfa 100644 --- a/board/tqc/tqm8272/nand.c +++ b/board/tqc/tqm8272/nand.c @@ -188,6 +188,7 @@ static void tqm8272_write_buf(struct mtd_info *mtdinfo, const uint8_t *buf, int *base = buf[i]; } +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) static int tqm8272_verify_buf(struct mtd_info *mtdinfo, const uint8_t *buf, int len) { struct nand_chip *this = mtdinfo->priv; @@ -199,6 +200,7 @@ static int tqm8272_verify_buf(struct mtd_info *mtdinfo, const uint8_t *buf, int return -1; return 0; } +#endif #endif /* #ifndef CONFIG_NAND_SPL */ void board_nand_select_device(struct nand_chip *nand, int chip) @@ -247,8 +249,10 @@ int board_nand_init(struct nand_chip *nand) #ifndef CONFIG_NAND_SPL nand->write_buf = tqm8272_write_buf; nand->read_buf = tqm8272_read_buf; +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) nand->verify_buf = tqm8272_verify_buf; #endif +#endif /* * Select required NAND chip diff --git a/common/cmd_ubi.c b/common/cmd_ubi.c index 3c37c93..6c85703 100644 --- a/common/cmd_ubi.c +++ b/common/cmd_ubi.c @@ -19,6 +19,7 @@ #include <onenand_uboot.h> #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> +#include <linux/err.h> #include <ubi_uboot.h> #include <asm/errno.h> #include <jffs2/load_kernel.h> @@ -50,33 +51,6 @@ int ubifs_is_mounted(void); void cmd_ubifs_umount(void); #endif -static void ubi_dump_vol_info(const struct ubi_volume *vol) -{ - ubi_msg("volume information dump:"); - ubi_msg("vol_id %d", vol->vol_id); - ubi_msg("reserved_pebs %d", vol->reserved_pebs); - ubi_msg("alignment %d", vol->alignment); - ubi_msg("data_pad %d", vol->data_pad); - ubi_msg("vol_type %d", vol->vol_type); - ubi_msg("name_len %d", vol->name_len); - ubi_msg("usable_leb_size %d", vol->usable_leb_size); - ubi_msg("used_ebs %d", vol->used_ebs); - ubi_msg("used_bytes %lld", vol->used_bytes); - ubi_msg("last_eb_bytes %d", vol->last_eb_bytes); - ubi_msg("corrupted %d", vol->corrupted); - ubi_msg("upd_marker %d", vol->upd_marker); - - if (vol->name_len <= UBI_VOL_NAME_MAX && - strnlen(vol->name, vol->name_len + 1) == vol->name_len) { - ubi_msg("name %s", vol->name); - } else { - ubi_msg("the 1st 5 characters of the name: %c%c%c%c%c", - vol->name[0], vol->name[1], vol->name[2], - vol->name[3], vol->name[4]); - } - printf("\n"); -} - static void display_volume_info(struct ubi_device *ubi) { int i; diff --git a/common/cmd_ubifs.c b/common/cmd_ubifs.c index 19c8a43..8e9a4e5 100644 --- a/common/cmd_ubifs.c +++ b/common/cmd_ubifs.c @@ -38,7 +38,7 @@ static int do_ubifs_mount(cmd_tbl_t *cmdtp, int flag, int argc, ubifs_initialized = 1; } - ret = ubifs_mount(vol_name); + ret = uboot_ubifs_mount(vol_name); if (ret) return -1; diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c index 31e4289..39daeab 100644 --- a/drivers/mtd/mtdconcat.c +++ b/drivers/mtd/mtdconcat.c @@ -1,16 +1,32 @@ /* * MTD device concatenation layer * - * (C) 2002 Robert Kaiser <rkaiser@sysgo.de> + * Copyright © 2002 Robert Kaiser <rkaiser@sysgo.de> + * Copyright © 2002-2010 David Woodhouse <dwmw2@infradead.org> * * NAND support by Christian Gan <cgan@iders.ca> * - * This code is GPL + * SPDX-License-Identifier: GPL-2.0+ + * */ -#include <linux/mtd/mtd.h> +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <linux/backing-dev.h> +#include <asm/div64.h> +#else +#include <div64.h> #include <linux/compat.h> +#endif + +#include <linux/mtd/mtd.h> #include <linux/mtd/concat.h> + #include <ubi_uboot.h> /* @@ -51,7 +67,9 @@ concat_read(struct mtd_info *mtd, loff_t from, size_t len, int ret = 0, err; int i; +#ifdef __UBOOT__ *retlen = 0; +#endif for (i = 0; i < concat->num_subdev; i++) { struct mtd_info *subdev = concat->subdev[i]; @@ -105,7 +123,9 @@ concat_write(struct mtd_info *mtd, loff_t to, size_t len, int err = -EINVAL; int i; +#ifdef __UBOOT__ *retlen = 0; +#endif for (i = 0; i < concat->num_subdev; i++) { struct mtd_info *subdev = concat->subdev[i]; @@ -137,6 +157,83 @@ concat_write(struct mtd_info *mtd, loff_t to, size_t len, return err; } +#ifndef __UBOOT__ +static int +concat_writev(struct mtd_info *mtd, const struct kvec *vecs, + unsigned long count, loff_t to, size_t * retlen) +{ + struct mtd_concat *concat = CONCAT(mtd); + struct kvec *vecs_copy; + unsigned long entry_low, entry_high; + size_t total_len = 0; + int i; + int err = -EINVAL; + + /* Calculate total length of data */ + for (i = 0; i < count; i++) + total_len += vecs[i].iov_len; + + /* Check alignment */ + if (mtd->writesize > 1) { + uint64_t __to = to; + if (do_div(__to, mtd->writesize) || (total_len % mtd->writesize)) + return -EINVAL; + } + + /* make a copy of vecs */ + vecs_copy = kmemdup(vecs, sizeof(struct kvec) * count, GFP_KERNEL); + if (!vecs_copy) + return -ENOMEM; + + entry_low = 0; + for (i = 0; i < concat->num_subdev; i++) { + struct mtd_info *subdev = concat->subdev[i]; + size_t size, wsize, retsize, old_iov_len; + + if (to >= subdev->size) { + to -= subdev->size; + continue; + } + + size = min_t(uint64_t, total_len, subdev->size - to); + wsize = size; /* store for future use */ + + entry_high = entry_low; + while (entry_high < count) { + if (size <= vecs_copy[entry_high].iov_len) + break; + size -= vecs_copy[entry_high++].iov_len; + } + + old_iov_len = vecs_copy[entry_high].iov_len; + vecs_copy[entry_high].iov_len = size; + + err = mtd_writev(subdev, &vecs_copy[entry_low], + entry_high - entry_low + 1, to, &retsize); + + vecs_copy[entry_high].iov_len = old_iov_len - size; + vecs_copy[entry_high].iov_base += size; + + entry_low = entry_high; + + if (err) + break; + + *retlen += retsize; + total_len -= wsize; + + if (total_len == 0) + break; + + err = -EINVAL; + to = 0; + } + + kfree(vecs_copy); + return err; +} +#endif + static int concat_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { @@ -204,7 +301,7 @@ concat_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops) if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; - ops->retlen = 0; + ops->retlen = ops->oobretlen = 0; for (i = 0; i < concat->num_subdev; i++) { struct mtd_info *subdev = concat->subdev[i]; @@ -219,7 +316,7 @@ concat_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops) devops.len = subdev->size - to; err = mtd_write_oob(subdev, to, &devops); - ops->retlen += devops.retlen; + ops->retlen += devops.oobretlen; if (err) return err; @@ -243,6 +340,9 @@ concat_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops) static void concat_erase_callback(struct erase_info *instr) { /* Nothing to do here in U-Boot */ +#ifndef __UBOOT__ + wake_up((wait_queue_head_t *) instr->priv); +#endif } static int concat_dev_erase(struct mtd_info *mtd, struct erase_info *erase) @@ -316,7 +416,7 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr) * to-be-erased area begins. Verify that the starting * offset is aligned to this region's erase size: */ - if (instr->addr & (erase_regions[i].erasesize - 1)) + if (i < 0 || instr->addr & (erase_regions[i].erasesize - 1)) return -EINVAL; /* @@ -329,8 +429,8 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr) /* * check if the ending offset is aligned to this region's erase size */ - if ((instr->addr + instr->len) & (erase_regions[i].erasesize - - 1)) + if (i < 0 || ((instr->addr + instr->len) & + (erase_regions[i].erasesize - 1))) return -EINVAL; } @@ -422,7 +522,6 @@ static int concat_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) size = len; err = mtd_lock(subdev, ofs, size); - if (err) break; @@ -457,7 +556,6 @@ static int concat_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) size = len; err = mtd_unlock(subdev, ofs, size); - if (err) break; @@ -483,6 +581,32 @@ static void concat_sync(struct mtd_info *mtd) } } +#ifndef __UBOOT__ +static int concat_suspend(struct mtd_info *mtd) +{ + struct mtd_concat *concat = CONCAT(mtd); + int i, rc = 0; + + for (i = 0; i < concat->num_subdev; i++) { + struct mtd_info *subdev = concat->subdev[i]; + if ((rc = mtd_suspend(subdev)) < 0) + return rc; + } + return rc; +} + +static void concat_resume(struct mtd_info *mtd) +{ + struct mtd_concat *concat = CONCAT(mtd); + int i; + + for (i = 0; i < concat->num_subdev; i++) { + struct mtd_info *subdev = concat->subdev[i]; + mtd_resume(subdev); + } +} +#endif + static int concat_block_isbad(struct mtd_info *mtd, loff_t ofs) { struct mtd_concat *concat = CONCAT(mtd); @@ -511,9 +635,6 @@ static int concat_block_markbad(struct mtd_info *mtd, loff_t ofs) struct mtd_concat *concat = CONCAT(mtd); int i, err = -EINVAL; - if (!mtd_can_have_bb(concat->subdev[0])) - return 0; - for (i = 0; i < concat->num_subdev; i++) { struct mtd_info *subdev = concat->subdev[i]; @@ -532,6 +653,32 @@ static int concat_block_markbad(struct mtd_info *mtd, loff_t ofs) } /* + * try to support NOMMU mmaps on concatenated devices + * - we don't support subdev spanning as we can't guarantee it'll work + */ +static unsigned long concat_get_unmapped_area(struct mtd_info *mtd, + unsigned long len, + unsigned long offset, + unsigned long flags) +{ + struct mtd_concat *concat = CONCAT(mtd); + int i; + + for (i = 0; i < concat->num_subdev; i++) { + struct mtd_info *subdev = concat->subdev[i]; + + if (offset >= subdev->size) { + offset -= subdev->size; + continue; + } + + return mtd_get_unmapped_area(subdev, len, offset, flags); + } + + return (unsigned long) -ENOSYS; +} + +/* * This function constructs a virtual MTD device by concatenating * num_devs MTD devices. A pointer to the new device object is * stored to *new_dev upon success. This function does _not_ @@ -539,17 +686,22 @@ static int concat_block_markbad(struct mtd_info *mtd, loff_t ofs) */ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to concatenate */ int num_devs, /* number of subdevices */ +#ifndef __UBOOT__ const char *name) +#else + char *name) +#endif { /* name for the new device */ int i; size_t size; struct mtd_concat *concat; uint32_t max_erasesize, curr_erasesize; int num_erase_region; + int max_writebufsize = 0; debug("Concatenating MTD devices:\n"); for (i = 0; i < num_devs; i++) - debug("(%d): \"%s\"\n", i, subdev[i]->name); + printk(KERN_NOTICE "(%d): \"%s\"\n", i, subdev[i]->name); debug("into device \"%s\"\n", name); /* allocate the device structure */ @@ -565,16 +717,26 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to c /* * Set up the new "super" device's MTD object structure, check for - * incompatibilites between the subdevices. + * incompatibilities between the subdevices. */ concat->mtd.type = subdev[0]->type; concat->mtd.flags = subdev[0]->flags; concat->mtd.size = subdev[0]->size; concat->mtd.erasesize = subdev[0]->erasesize; concat->mtd.writesize = subdev[0]->writesize; + + for (i = 0; i < num_devs; i++) + if (max_writebufsize < subdev[i]->writebufsize) + max_writebufsize = subdev[i]->writebufsize; + concat->mtd.writebufsize = max_writebufsize; + concat->mtd.subpage_sft = subdev[0]->subpage_sft; concat->mtd.oobsize = subdev[0]->oobsize; concat->mtd.oobavail = subdev[0]->oobavail; +#ifndef __UBOOT__ + if (subdev[0]->_writev) + concat->mtd._writev = concat_writev; +#endif if (subdev[0]->_read_oob) concat->mtd._read_oob = concat_read_oob; if (subdev[0]->_write_oob) @@ -586,6 +748,10 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to c concat->mtd.ecc_stats.badblocks = subdev[0]->ecc_stats.badblocks; +#ifndef __UBOOT__ + concat->mtd.backing_dev_info = subdev[0]->backing_dev_info; +#endif + concat->subdev[0] = subdev[0]; for (i = 1; i < num_devs; i++) { @@ -613,6 +779,16 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to c subdev[i]->flags & MTD_WRITEABLE; } +#ifndef __UBOOT__ + /* only permit direct mapping if the BDIs are all the same + * - copy-mapping is still permitted + */ + if (concat->mtd.backing_dev_info != + subdev[i]->backing_dev_info) + concat->mtd.backing_dev_info = + &default_backing_dev_info; +#endif + concat->mtd.size += subdev[i]->size; concat->mtd.ecc_stats.badblocks += subdev[i]->ecc_stats.badblocks; @@ -641,6 +817,11 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to c concat->mtd._sync = concat_sync; concat->mtd._lock = concat_lock; concat->mtd._unlock = concat_unlock; +#ifndef __UBOOT__ + concat->mtd._suspend = concat_suspend; + concat->mtd._resume = concat_resume; +#endif + concat->mtd._get_unmapped_area = concat_get_unmapped_area; /* * Combine the erase block size info of the subdevices: @@ -771,3 +952,22 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to c return &concat->mtd; } + +/* + * This function destroys an MTD object obtained from concat_mtd_devs() + */ + +void mtd_concat_destroy(struct mtd_info *mtd) +{ + struct mtd_concat *concat = CONCAT(mtd); + if (concat->mtd.numeraseregions) + kfree(concat->mtd.eraseregions); + kfree(concat); +} + +EXPORT_SYMBOL(mtd_concat_create); +EXPORT_SYMBOL(mtd_concat_destroy); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Robert Kaiser <rkaiser@sysgo.de>"); +MODULE_DESCRIPTION("Generic support for concatenating of MTD devices"); diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 0a38fbe..796ac07 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -2,130 +2,767 @@ * Core registration and callback routines for MTD * drivers and users. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. + * Copyright © 1999-2010 David Woodhouse <dwmw2@infradead.org> + * Copyright © 2006 Red Hat UK Limited + * + * SPDX-License-Identifier: GPL-2.0+ + * */ -#include <linux/mtd/mtd.h> +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/ptrace.h> +#include <linux/seq_file.h> +#include <linux/string.h> +#include <linux/timer.h> +#include <linux/major.h> +#include <linux/fs.h> +#include <linux/err.h> +#include <linux/ioctl.h> +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/idr.h> +#include <linux/backing-dev.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#else #include <linux/compat.h> +#include <linux/err.h> #include <ubi_uboot.h> +#endif + +#include <linux/mtd/mtd.h> +#include <linux/mtd/partitions.h> + +#include "mtdcore.h" + +#ifndef __UBOOT__ +/* + * backing device capabilities for non-mappable devices (such as NAND flash) + * - permits private mappings, copies are taken of the data + */ +static struct backing_dev_info mtd_bdi_unmappable = { + .capabilities = BDI_CAP_MAP_COPY, +}; + +/* + * backing device capabilities for R/O mappable devices (such as ROM) + * - permits private mappings, copies are taken of the data + * - permits non-writable shared mappings + */ +static struct backing_dev_info mtd_bdi_ro_mappable = { + .capabilities = (BDI_CAP_MAP_COPY | BDI_CAP_MAP_DIRECT | + BDI_CAP_EXEC_MAP | BDI_CAP_READ_MAP), +}; + +/* + * backing device capabilities for writable mappable devices (such as RAM) + * - permits private mappings, copies are taken of the data + * - permits non-writable shared mappings + */ +static struct backing_dev_info mtd_bdi_rw_mappable = { + .capabilities = (BDI_CAP_MAP_COPY | BDI_CAP_MAP_DIRECT | + BDI_CAP_EXEC_MAP | BDI_CAP_READ_MAP | + BDI_CAP_WRITE_MAP), +}; + +static int mtd_cls_suspend(struct device *dev, pm_message_t state); +static int mtd_cls_resume(struct device *dev); +static struct class mtd_class = { + .name = "mtd", + .owner = THIS_MODULE, + .suspend = mtd_cls_suspend, + .resume = mtd_cls_resume, +}; +#else struct mtd_info *mtd_table[MAX_MTD_DEVICES]; +#define MAX_IDR_ID 64 + +struct idr_layer { + int used; + void *ptr; +}; + +struct idr { + struct idr_layer id[MAX_IDR_ID]; +}; + +#define DEFINE_IDR(name) struct idr name; + +void idr_remove(struct idr *idp, int id) +{ + if (idp->id[id].used) + idp->id[id].used = 0; + + return; +} +void *idr_find(struct idr *idp, int id) +{ + if (idp->id[id].used) + return idp->id[id].ptr; + + return NULL; +} + +void *idr_get_next(struct idr *idp, int *next) +{ + void *ret; + int id = *next; + + ret = idr_find(idp, id); + if (ret) { + id ++; + if (!idp->id[id].used) + id = 0; + *next = id; + } else { + *next = 0; + } + + return ret; +} + +int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask) +{ + struct idr_layer *idl; + int i = 0; + + while (i < MAX_IDR_ID) { + idl = &idp->id[i]; + if (idl->used == 0) { + idl->used = 1; + idl->ptr = ptr; + return i; + } + i++; + } + return -ENOSPC; +} +#endif + +static DEFINE_IDR(mtd_idr); + +/* These are exported solely for the purpose of mtd_blkdevs.c. You + should not use them for _anything_ else */ +DEFINE_MUTEX(mtd_table_mutex); +EXPORT_SYMBOL_GPL(mtd_table_mutex); + +struct mtd_info *__mtd_next_device(int i) +{ + return idr_get_next(&mtd_idr, &i); +} +EXPORT_SYMBOL_GPL(__mtd_next_device); + +#ifndef __UBOOT__ +static LIST_HEAD(mtd_notifiers); + + +#define MTD_DEVT(index) MKDEV(MTD_CHAR_MAJOR, (index)*2) + +/* REVISIT once MTD uses the driver model better, whoever allocates + * the mtd_info will probably want to use the release() hook... + */ +static void mtd_release(struct device *dev) +{ + struct mtd_info __maybe_unused *mtd = dev_get_drvdata(dev); + dev_t index = MTD_DEVT(mtd->index); + + /* remove /dev/mtdXro node if needed */ + if (index) + device_destroy(&mtd_class, index + 1); +} + +static int mtd_cls_suspend(struct device *dev, pm_message_t state) +{ + struct mtd_info *mtd = dev_get_drvdata(dev); + + return mtd ? mtd_suspend(mtd) : 0; +} + +static int mtd_cls_resume(struct device *dev) +{ + struct mtd_info *mtd = dev_get_drvdata(dev); + + if (mtd) + mtd_resume(mtd); + return 0; +} + +static ssize_t mtd_type_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mtd_info *mtd = dev_get_drvdata(dev); + char *type; + + switch (mtd->type) { + case MTD_ABSENT: + type = "absent"; + break; + case MTD_RAM: + type = "ram"; + break; + case MTD_ROM: + type = "rom"; + break; + case MTD_NORFLASH: + type = "nor"; + break; + case MTD_NANDFLASH: + type = "nand"; + break; + case MTD_DATAFLASH: + type = "dataflash"; + break; + case MTD_UBIVOLUME: + type = "ubi"; + break; + case MTD_MLCNANDFLASH: + type = "mlc-nand"; + break; + default: + type = "unknown"; + } + + return snprintf(buf, PAGE_SIZE, "%s\n", type); +} +static DEVICE_ATTR(type, S_IRUGO, mtd_type_show, NULL); + +static ssize_t mtd_flags_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mtd_info *mtd = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)mtd->flags); + +} +static DEVICE_ATTR(flags, S_IRUGO, mtd_flags_show, NULL); + +static ssize_t mtd_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mtd_info *mtd = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)mtd->size); + +} +static DEVICE_ATTR(size, S_IRUGO, mtd_size_show, NULL); + +static ssize_t mtd_erasesize_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mtd_info *mtd = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%lu\n", (unsigned long)mtd->erasesize); + +} +static DEVICE_ATTR(erasesize, S_IRUGO, mtd_erasesize_show, NULL); + +static ssize_t mtd_writesize_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mtd_info *mtd = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%lu\n", (unsigned long)mtd->writesize); + +} +static DEVICE_ATTR(writesize, S_IRUGO, mtd_writesize_show, NULL); + +static ssize_t mtd_subpagesize_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mtd_info *mtd = dev_get_drvdata(dev); + unsigned int subpagesize = mtd->writesize >> mtd->subpage_sft; + + return snprintf(buf, PAGE_SIZE, "%u\n", subpagesize); + +} +static DEVICE_ATTR(subpagesize, S_IRUGO, mtd_subpagesize_show, NULL); + +static ssize_t mtd_oobsize_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mtd_info *mtd = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%lu\n", (unsigned long)mtd->oobsize); + +} +static DEVICE_ATTR(oobsize, S_IRUGO, mtd_oobsize_show, NULL); + +static ssize_t mtd_numeraseregions_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mtd_info *mtd = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%u\n", mtd->numeraseregions); + +} +static DEVICE_ATTR(numeraseregions, S_IRUGO, mtd_numeraseregions_show, + NULL); + +static ssize_t mtd_name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mtd_info *mtd = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%s\n", mtd->name); + +} +static DEVICE_ATTR(name, S_IRUGO, mtd_name_show, NULL); + +static ssize_t mtd_ecc_strength_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mtd_info *mtd = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%u\n", mtd->ecc_strength); +} +static DEVICE_ATTR(ecc_strength, S_IRUGO, mtd_ecc_strength_show, NULL); + +static ssize_t mtd_bitflip_threshold_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mtd_info *mtd = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%u\n", mtd->bitflip_threshold); +} + +static ssize_t mtd_bitflip_threshold_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mtd_info *mtd = dev_get_drvdata(dev); + unsigned int bitflip_threshold; + int retval; + + retval = kstrtouint(buf, 0, &bitflip_threshold); + if (retval) + return retval; + + mtd->bitflip_threshold = bitflip_threshold; + return count; +} +static DEVICE_ATTR(bitflip_threshold, S_IRUGO | S_IWUSR, + mtd_bitflip_threshold_show, + mtd_bitflip_threshold_store); + +static ssize_t mtd_ecc_step_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mtd_info *mtd = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%u\n", mtd->ecc_step_size); + +} +static DEVICE_ATTR(ecc_step_size, S_IRUGO, mtd_ecc_step_size_show, NULL); + +static struct attribute *mtd_attrs[] = { + &dev_attr_type.attr, + &dev_attr_flags.attr, + &dev_attr_size.attr, + &dev_attr_erasesize.attr, + &dev_attr_writesize.attr, + &dev_attr_subpagesize.attr, + &dev_attr_oobsize.attr, + &dev_attr_numeraseregions.attr, + &dev_attr_name.attr, + &dev_attr_ecc_strength.attr, + &dev_attr_ecc_step_size.attr, + &dev_attr_bitflip_threshold.attr, + NULL, +}; +ATTRIBUTE_GROUPS(mtd); + +static struct device_type mtd_devtype = { + .name = "mtd", + .groups = mtd_groups, + .release = mtd_release, +}; +#endif + +/** + * add_mtd_device - register an MTD device + * @mtd: pointer to new MTD device info structure + * + * Add a device to the list of MTD devices present in the system, and + * notify each currently active MTD 'user' of its arrival. Returns + * zero on success or 1 on failure, which currently will only happen + * if there is insufficient memory or a sysfs error. + */ + int add_mtd_device(struct mtd_info *mtd) { - int i; +#ifndef __UBOOT__ + struct mtd_notifier *not; +#endif + int i, error; + +#ifndef __UBOOT__ + if (!mtd->backing_dev_info) { + switch (mtd->type) { + case MTD_RAM: + mtd->backing_dev_info = &mtd_bdi_rw_mappable; + break; + case MTD_ROM: + mtd->backing_dev_info = &mtd_bdi_ro_mappable; + break; + default: + mtd->backing_dev_info = &mtd_bdi_unmappable; + break; + } + } +#endif BUG_ON(mtd->writesize == 0); + mutex_lock(&mtd_table_mutex); - for (i = 0; i < MAX_MTD_DEVICES; i++) - if (!mtd_table[i]) { - mtd_table[i] = mtd; - mtd->index = i; - mtd->usecount = 0; + i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL); + if (i < 0) + goto fail_locked; - /* default value if not set by driver */ - if (mtd->bitflip_threshold == 0) - mtd->bitflip_threshold = mtd->ecc_strength; + mtd->index = i; + mtd->usecount = 0; + /* default value if not set by driver */ + if (mtd->bitflip_threshold == 0) + mtd->bitflip_threshold = mtd->ecc_strength; - /* No need to get a refcount on the module containing - the notifier, since we hold the mtd_table_mutex */ + if (is_power_of_2(mtd->erasesize)) + mtd->erasesize_shift = ffs(mtd->erasesize) - 1; + else + mtd->erasesize_shift = 0; - /* We _know_ we aren't being removed, because - our caller is still holding us here. So none - of this try_ nonsense, and no bitching about it - either. :) */ - return 0; - } + if (is_power_of_2(mtd->writesize)) + mtd->writesize_shift = ffs(mtd->writesize) - 1; + else + mtd->writesize_shift = 0; + + mtd->erasesize_mask = (1 << mtd->erasesize_shift) - 1; + mtd->writesize_mask = (1 << mtd->writesize_shift) - 1; + + /* Some chips always power up locked. Unlock them now */ + if ((mtd->flags & MTD_WRITEABLE) && (mtd->flags & MTD_POWERUP_LOCK)) { + error = mtd_unlock(mtd, 0, mtd->size); + if (error && error != -EOPNOTSUPP) + printk(KERN_WARNING + "%s: unlock failed, writes may not work\n", + mtd->name); + } + +#ifndef __UBOOT__ + /* Caller should have set dev.parent to match the + * physical device. + */ + mtd->dev.type = &mtd_devtype; + mtd->dev.class = &mtd_class; + mtd->dev.devt = MTD_DEVT(i); + dev_set_name(&mtd->dev, "mtd%d", i); + dev_set_drvdata(&mtd->dev, mtd); + if (device_register(&mtd->dev) != 0) + goto fail_added; + if (MTD_DEVT(i)) + device_create(&mtd_class, mtd->dev.parent, + MTD_DEVT(i) + 1, + NULL, "mtd%dro", i); + + pr_debug("mtd: Giving out device %d to %s\n", i, mtd->name); + /* No need to get a refcount on the module containing + the notifier, since we hold the mtd_table_mutex */ + list_for_each_entry(not, &mtd_notifiers, list) + not->add(mtd); +#endif + + mutex_unlock(&mtd_table_mutex); + /* We _know_ we aren't being removed, because + our caller is still holding us here. So none + of this try_ nonsense, and no bitching about it + either. :) */ + __module_get(THIS_MODULE); + return 0; + +#ifndef __UBOOT__ +fail_added: + idr_remove(&mtd_idr, i); +#endif +fail_locked: + mutex_unlock(&mtd_table_mutex); return 1; } /** - * del_mtd_device - unregister an MTD device - * @mtd: pointer to MTD device info structure + * del_mtd_device - unregister an MTD device + * @mtd: pointer to MTD device info structure * - * Remove a device from the list of MTD devices present in the system, - * and notify each currently active MTD 'user' of its departure. - * Returns zero on success or 1 on failure, which currently will happen - * if the requested device does not appear to be present in the list. + * Remove a device from the list of MTD devices present in the system, + * and notify each currently active MTD 'user' of its departure. + * Returns zero on success or 1 on failure, which currently will happen + * if the requested device does not appear to be present in the list. */ + int del_mtd_device(struct mtd_info *mtd) { int ret; +#ifndef __UBOOT__ + struct mtd_notifier *not; +#endif + + mutex_lock(&mtd_table_mutex); - if (mtd_table[mtd->index] != mtd) { + if (idr_find(&mtd_idr, mtd->index) != mtd) { ret = -ENODEV; - } else if (mtd->usecount) { - printk(KERN_NOTICE "Removing MTD device #%d (%s)" - " with use count %d\n", - mtd->index, mtd->name, mtd->usecount); + goto out_error; + } + +#ifndef __UBOOT__ + /* No need to get a refcount on the module containing + the notifier, since we hold the mtd_table_mutex */ + list_for_each_entry(not, &mtd_notifiers, list) + not->remove(mtd); +#endif + + if (mtd->usecount) { + printk(KERN_NOTICE "Removing MTD device #%d (%s) with use count %d\n", + mtd->index, mtd->name, mtd->usecount); ret = -EBUSY; } else { - /* No need to get a refcount on the module containing - * the notifier, since we hold the mtd_table_mutex */ - mtd_table[mtd->index] = NULL; +#ifndef __UBOOT__ + device_unregister(&mtd->dev); +#endif + + idr_remove(&mtd_idr, mtd->index); + module_put(THIS_MODULE); ret = 0; } +out_error: + mutex_unlock(&mtd_table_mutex); return ret; } +#ifndef __UBOOT__ +/** + * mtd_device_parse_register - parse partitions and register an MTD device. + * + * @mtd: the MTD device to register + * @types: the list of MTD partition probes to try, see + * 'parse_mtd_partitions()' for more information + * @parser_data: MTD partition parser-specific data + * @parts: fallback partition information to register, if parsing fails; + * only valid if %nr_parts > %0 + * @nr_parts: the number of partitions in parts, if zero then the full + * MTD device is registered if no partition info is found + * + * This function aggregates MTD partitions parsing (done by + * 'parse_mtd_partitions()') and MTD device and partitions registering. It + * basically follows the most common pattern found in many MTD drivers: + * + * * It first tries to probe partitions on MTD device @mtd using parsers + * specified in @types (if @types is %NULL, then the default list of parsers + * is used, see 'parse_mtd_partitions()' for more information). If none are + * found this functions tries to fallback to information specified in + * @parts/@nr_parts. + * * If any partitioning info was found, this function registers the found + * partitions. + * * If no partitions were found this function just registers the MTD device + * @mtd and exits. + * + * Returns zero in case of success and a negative error code in case of failure. + */ +int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types, + struct mtd_part_parser_data *parser_data, + const struct mtd_partition *parts, + int nr_parts) +{ + int err; + struct mtd_partition *real_parts; + + err = parse_mtd_partitions(mtd, types, &real_parts, parser_data); + if (err <= 0 && nr_parts && parts) { + real_parts = kmemdup(parts, sizeof(*parts) * nr_parts, + GFP_KERNEL); + if (!real_parts) + err = -ENOMEM; + else + err = nr_parts; + } + + if (err > 0) { + err = add_mtd_partitions(mtd, real_parts, err); + kfree(real_parts); + } else if (err == 0) { + err = add_mtd_device(mtd); + if (err == 1) + err = -ENODEV; + } + + return err; +} +EXPORT_SYMBOL_GPL(mtd_device_parse_register); + +/** + * mtd_device_unregister - unregister an existing MTD device. + * + * @master: the MTD device to unregister. This will unregister both the master + * and any partitions if registered. + */ +int mtd_device_unregister(struct mtd_info *master) +{ + int err; + + err = del_mtd_partitions(master); + if (err) + return err; + + if (!device_is_registered(&master->dev)) + return 0; + + return del_mtd_device(master); +} +EXPORT_SYMBOL_GPL(mtd_device_unregister); + +/** + * register_mtd_user - register a 'user' of MTD devices. + * @new: pointer to notifier info structure + * + * Registers a pair of callbacks function to be called upon addition + * or removal of MTD devices. Causes the 'add' callback to be immediately + * invoked for each MTD device currently present in the system. + */ +void register_mtd_user (struct mtd_notifier *new) +{ + struct mtd_info *mtd; + + mutex_lock(&mtd_table_mutex); + + list_add(&new->list, &mtd_notifiers); + + __module_get(THIS_MODULE); + + mtd_for_each_device(mtd) + new->add(mtd); + + mutex_unlock(&mtd_table_mutex); +} +EXPORT_SYMBOL_GPL(register_mtd_user); + +/** + * unregister_mtd_user - unregister a 'user' of MTD devices. + * @old: pointer to notifier info structure + * + * Removes a callback function pair from the list of 'users' to be + * notified upon addition or removal of MTD devices. Causes the + * 'remove' callback to be immediately invoked for each MTD device + * currently present in the system. + */ +int unregister_mtd_user (struct mtd_notifier *old) +{ + struct mtd_info *mtd; + + mutex_lock(&mtd_table_mutex); + + module_put(THIS_MODULE); + + mtd_for_each_device(mtd) + old->remove(mtd); + + list_del(&old->list); + mutex_unlock(&mtd_table_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(unregister_mtd_user); +#endif + /** * get_mtd_device - obtain a validated handle for an MTD device * @mtd: last known address of the required MTD device * @num: internal device number of the required MTD device * * Given a number and NULL address, return the num'th entry in the device - * table, if any. Given an address and num == -1, search the device table - * for a device with that address and return if it's still present. Given - * both, return the num'th driver only if its address matches. Return - * error code if not. + * table, if any. Given an address and num == -1, search the device table + * for a device with that address and return if it's still present. Given + * both, return the num'th driver only if its address matches. Return + * error code if not. */ struct mtd_info *get_mtd_device(struct mtd_info *mtd, int num) { - struct mtd_info *ret = NULL; - int i, err = -ENODEV; + struct mtd_info *ret = NULL, *other; + int err = -ENODEV; + + mutex_lock(&mtd_table_mutex); if (num == -1) { - for (i = 0; i < MAX_MTD_DEVICES; i++) - if (mtd_table[i] == mtd) - ret = mtd_table[i]; - } else if (num < MAX_MTD_DEVICES) { - ret = mtd_table[num]; + mtd_for_each_device(other) { + if (other == mtd) { + ret = mtd; + break; + } + } + } else if (num >= 0) { + ret = idr_find(&mtd_idr, num); if (mtd && mtd != ret) ret = NULL; } - if (!ret) - goto out_unlock; + if (!ret) { + ret = ERR_PTR(err); + goto out; + } - ret->usecount++; + err = __get_mtd_device(ret); + if (err) + ret = ERR_PTR(err); +out: + mutex_unlock(&mtd_table_mutex); return ret; +} +EXPORT_SYMBOL_GPL(get_mtd_device); -out_unlock: - return ERR_PTR(err); + +int __get_mtd_device(struct mtd_info *mtd) +{ + int err; + + if (!try_module_get(mtd->owner)) + return -ENODEV; + + if (mtd->_get_device) { + err = mtd->_get_device(mtd); + + if (err) { + module_put(mtd->owner); + return err; + } + } + mtd->usecount++; + return 0; } +EXPORT_SYMBOL_GPL(__get_mtd_device); /** - * get_mtd_device_nm - obtain a validated handle for an MTD device by - * device name - * @name: MTD device name to open + * get_mtd_device_nm - obtain a validated handle for an MTD device by + * device name + * @name: MTD device name to open * - * This function returns MTD device description structure in case of - * success and an error code in case of failure. + * This function returns MTD device description structure in case of + * success and an error code in case of failure. */ struct mtd_info *get_mtd_device_nm(const char *name) { - int i, err = -ENODEV; - struct mtd_info *mtd = NULL; + int err = -ENODEV; + struct mtd_info *mtd = NULL, *other; + + mutex_lock(&mtd_table_mutex); - for (i = 0; i < MAX_MTD_DEVICES; i++) { - if (mtd_table[i] && !strcmp(name, mtd_table[i]->name)) { - mtd = mtd_table[i]; + mtd_for_each_device(other) { + if (!strcmp(name, other->name)) { + mtd = other; break; } } @@ -133,20 +770,18 @@ struct mtd_info *get_mtd_device_nm(const char *name) if (!mtd) goto out_unlock; - mtd->usecount++; + err = __get_mtd_device(mtd); + if (err) + goto out_unlock; + + mutex_unlock(&mtd_table_mutex); return mtd; out_unlock: + mutex_unlock(&mtd_table_mutex); return ERR_PTR(err); } - -void put_mtd_device(struct mtd_info *mtd) -{ - int c; - - c = --mtd->usecount; - BUG_ON(c < 0); -} +EXPORT_SYMBOL_GPL(get_mtd_device_nm); #if defined(CONFIG_CMD_MTDPARTS_SPREAD) /** @@ -192,7 +827,28 @@ void mtd_get_len_incl_bad(struct mtd_info *mtd, uint64_t offset, } #endif /* defined(CONFIG_CMD_MTDPARTS_SPREAD) */ - /* +void put_mtd_device(struct mtd_info *mtd) +{ + mutex_lock(&mtd_table_mutex); + __put_mtd_device(mtd); + mutex_unlock(&mtd_table_mutex); + +} +EXPORT_SYMBOL_GPL(put_mtd_device); + +void __put_mtd_device(struct mtd_info *mtd) +{ + --mtd->usecount; + BUG_ON(mtd->usecount < 0); + + if (mtd->_put_device) + mtd->_put_device(mtd); + + module_put(mtd->owner); +} +EXPORT_SYMBOL_GPL(__put_mtd_device); + +/* * Erase is an asynchronous operation. Device drivers are supposed * to call instr->callback() whenever the operation completes, even * if it completes with a failure. @@ -213,11 +869,64 @@ int mtd_erase(struct mtd_info *mtd, struct erase_info *instr) } return mtd->_erase(mtd, instr); } +EXPORT_SYMBOL_GPL(mtd_erase); + +#ifndef __UBOOT__ +/* + * This stuff for eXecute-In-Place. phys is optional and may be set to NULL. + */ +int mtd_point(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, + void **virt, resource_size_t *phys) +{ + *retlen = 0; + *virt = NULL; + if (phys) + *phys = 0; + if (!mtd->_point) + return -EOPNOTSUPP; + if (from < 0 || from > mtd->size || len > mtd->size - from) + return -EINVAL; + if (!len) + return 0; + return mtd->_point(mtd, from, len, retlen, virt, phys); +} +EXPORT_SYMBOL_GPL(mtd_point); + +/* We probably shouldn't allow XIP if the unpoint isn't a NULL */ +int mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len) +{ + if (!mtd->_point) + return -EOPNOTSUPP; + if (from < 0 || from > mtd->size || len > mtd->size - from) + return -EINVAL; + if (!len) + return 0; + return mtd->_unpoint(mtd, from, len); +} +EXPORT_SYMBOL_GPL(mtd_unpoint); +#endif + +/* + * Allow NOMMU mmap() to directly map the device (if not NULL) + * - return the address to which the offset maps + * - return -ENOSYS to indicate refusal to do the mapping + */ +unsigned long mtd_get_unmapped_area(struct mtd_info *mtd, unsigned long len, + unsigned long offset, unsigned long flags) +{ + if (!mtd->_get_unmapped_area) + return -EOPNOTSUPP; + if (offset > mtd->size || len > mtd->size - offset) + return -EINVAL; + return mtd->_get_unmapped_area(mtd, len, offset, flags); +} +EXPORT_SYMBOL_GPL(mtd_get_unmapped_area); int mtd_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { int ret_code; + *retlen = 0; if (from < 0 || from > mtd->size || len > mtd->size - from) return -EINVAL; if (!len) @@ -235,6 +944,7 @@ int mtd_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, return 0; /* device lacks ecc */ return ret_code >= mtd->bitflip_threshold ? -EUCLEAN : 0; } +EXPORT_SYMBOL_GPL(mtd_read); int mtd_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) @@ -248,6 +958,7 @@ int mtd_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, return 0; return mtd->_write(mtd, to, len, retlen, buf); } +EXPORT_SYMBOL_GPL(mtd_write); /* * In blackbox flight recorder like scenarios we want to make successful writes @@ -270,14 +981,28 @@ int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, return 0; return mtd->_panic_write(mtd, to, len, retlen, buf); } +EXPORT_SYMBOL_GPL(mtd_panic_write); int mtd_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { + int ret_code; ops->retlen = ops->oobretlen = 0; if (!mtd->_read_oob) return -EOPNOTSUPP; - return mtd->_read_oob(mtd, from, ops); + /* + * In cases where ops->datbuf != NULL, mtd->_read_oob() has semantics + * similar to mtd->_read(), returning a non-negative integer + * representing max bitflips. In other cases, mtd->_read_oob() may + * return -EUCLEAN. In all cases, perform similar logic to mtd_read(). + */ + ret_code = mtd->_read_oob(mtd, from, ops); + if (unlikely(ret_code < 0)) + return ret_code; + if (mtd->ecc_strength == 0) + return 0; /* device lacks ecc */ + return ret_code >= mtd->bitflip_threshold ? -EUCLEAN : 0; } +EXPORT_SYMBOL_GPL(mtd_read_oob); /* * Method to access the protection register area, present in some flash @@ -293,6 +1018,7 @@ int mtd_get_fact_prot_info(struct mtd_info *mtd, struct otp_info *buf, return 0; return mtd->_get_fact_prot_info(mtd, buf, len); } +EXPORT_SYMBOL_GPL(mtd_get_fact_prot_info); int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) @@ -304,6 +1030,7 @@ int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, return 0; return mtd->_read_fact_prot_reg(mtd, from, len, retlen, buf); } +EXPORT_SYMBOL_GPL(mtd_read_fact_prot_reg); int mtd_get_user_prot_info(struct mtd_info *mtd, struct otp_info *buf, size_t len) @@ -314,6 +1041,7 @@ int mtd_get_user_prot_info(struct mtd_info *mtd, struct otp_info *buf, return 0; return mtd->_get_user_prot_info(mtd, buf, len); } +EXPORT_SYMBOL_GPL(mtd_get_user_prot_info); int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) @@ -325,6 +1053,7 @@ int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, return 0; return mtd->_read_user_prot_reg(mtd, from, len, retlen, buf); } +EXPORT_SYMBOL_GPL(mtd_read_user_prot_reg); int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, u_char *buf) @@ -336,6 +1065,7 @@ int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to, size_t len, return 0; return mtd->_write_user_prot_reg(mtd, to, len, retlen, buf); } +EXPORT_SYMBOL_GPL(mtd_write_user_prot_reg); int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len) { @@ -345,6 +1075,7 @@ int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len) return 0; return mtd->_lock_user_prot_reg(mtd, from, len); } +EXPORT_SYMBOL_GPL(mtd_lock_user_prot_reg); /* Chip-supported device locking */ int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) @@ -357,6 +1088,7 @@ int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) return 0; return mtd->_lock(mtd, ofs, len); } +EXPORT_SYMBOL_GPL(mtd_lock); int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) { @@ -368,6 +1100,19 @@ int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) return 0; return mtd->_unlock(mtd, ofs, len); } +EXPORT_SYMBOL_GPL(mtd_unlock); + +int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len) +{ + if (!mtd->_is_locked) + return -EOPNOTSUPP; + if (ofs < 0 || ofs > mtd->size || len > mtd->size - ofs) + return -EINVAL; + if (!len) + return 0; + return mtd->_is_locked(mtd, ofs, len); +} +EXPORT_SYMBOL_GPL(mtd_is_locked); int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs) { @@ -377,6 +1122,7 @@ int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs) return -EINVAL; return mtd->_block_isbad(mtd, ofs); } +EXPORT_SYMBOL_GPL(mtd_block_isbad); int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs) { @@ -388,3 +1134,225 @@ int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs) return -EROFS; return mtd->_block_markbad(mtd, ofs); } +EXPORT_SYMBOL_GPL(mtd_block_markbad); + +#ifndef __UBOOT__ +/* + * default_mtd_writev - the default writev method + * @mtd: mtd device description object pointer + * @vecs: the vectors to write + * @count: count of vectors in @vecs + * @to: the MTD device offset to write to + * @retlen: on exit contains the count of bytes written to the MTD device. + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, + unsigned long count, loff_t to, size_t *retlen) +{ + unsigned long i; + size_t totlen = 0, thislen; + int ret = 0; + + for (i = 0; i < count; i++) { + if (!vecs[i].iov_len) + continue; + ret = mtd_write(mtd, to, vecs[i].iov_len, &thislen, + vecs[i].iov_base); + totlen += thislen; + if (ret || thislen != vecs[i].iov_len) + break; + to += vecs[i].iov_len; + } + *retlen = totlen; + return ret; +} + +/* + * mtd_writev - the vector-based MTD write method + * @mtd: mtd device description object pointer + * @vecs: the vectors to write + * @count: count of vectors in @vecs + * @to: the MTD device offset to write to + * @retlen: on exit contains the count of bytes written to the MTD device. + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, + unsigned long count, loff_t to, size_t *retlen) +{ + *retlen = 0; + if (!(mtd->flags & MTD_WRITEABLE)) + return -EROFS; + if (!mtd->_writev) + return default_mtd_writev(mtd, vecs, count, to, retlen); + return mtd->_writev(mtd, vecs, count, to, retlen); +} +EXPORT_SYMBOL_GPL(mtd_writev); + +/** + * mtd_kmalloc_up_to - allocate a contiguous buffer up to the specified size + * @mtd: mtd device description object pointer + * @size: a pointer to the ideal or maximum size of the allocation, points + * to the actual allocation size on success. + * + * This routine attempts to allocate a contiguous kernel buffer up to + * the specified size, backing off the size of the request exponentially + * until the request succeeds or until the allocation size falls below + * the system page size. This attempts to make sure it does not adversely + * impact system performance, so when allocating more than one page, we + * ask the memory allocator to avoid re-trying, swapping, writing back + * or performing I/O. + * + * Note, this function also makes sure that the allocated buffer is aligned to + * the MTD device's min. I/O unit, i.e. the "mtd->writesize" value. + * + * This is called, for example by mtd_{read,write} and jffs2_scan_medium, + * to handle smaller (i.e. degraded) buffer allocations under low- or + * fragmented-memory situations where such reduced allocations, from a + * requested ideal, are allowed. + * + * Returns a pointer to the allocated buffer on success; otherwise, NULL. + */ +void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size) +{ + gfp_t flags = __GFP_NOWARN | __GFP_WAIT | + __GFP_NORETRY | __GFP_NO_KSWAPD; + size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE); + void *kbuf; + + *size = min_t(size_t, *size, KMALLOC_MAX_SIZE); + + while (*size > min_alloc) { + kbuf = kmalloc(*size, flags); + if (kbuf) + return kbuf; + + *size >>= 1; + *size = ALIGN(*size, mtd->writesize); + } + + /* + * For the last resort allocation allow 'kmalloc()' to do all sorts of + * things (write-back, dropping caches, etc) by using GFP_KERNEL. + */ + return kmalloc(*size, GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(mtd_kmalloc_up_to); +#endif + +#ifdef CONFIG_PROC_FS + +/*====================================================================*/ +/* Support for /proc/mtd */ + +static int mtd_proc_show(struct seq_file *m, void *v) +{ + struct mtd_info *mtd; + + seq_puts(m, "dev: size erasesize name\n"); + mutex_lock(&mtd_table_mutex); + mtd_for_each_device(mtd) { + seq_printf(m, "mtd%d: %8.8llx %8.8x \"%s\"\n", + mtd->index, (unsigned long long)mtd->size, + mtd->erasesize, mtd->name); + } + mutex_unlock(&mtd_table_mutex); + return 0; +} + +static int mtd_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, mtd_proc_show, NULL); +} + +static const struct file_operations mtd_proc_ops = { + .open = mtd_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif /* CONFIG_PROC_FS */ + +/*====================================================================*/ +/* Init code */ + +#ifndef __UBOOT__ +static int __init mtd_bdi_init(struct backing_dev_info *bdi, const char *name) +{ + int ret; + + ret = bdi_init(bdi); + if (!ret) + ret = bdi_register(bdi, NULL, "%s", name); + + if (ret) + bdi_destroy(bdi); + + return ret; +} + +static struct proc_dir_entry *proc_mtd; + +static int __init init_mtd(void) +{ + int ret; + + ret = class_register(&mtd_class); + if (ret) + goto err_reg; + + ret = mtd_bdi_init(&mtd_bdi_unmappable, "mtd-unmap"); + if (ret) + goto err_bdi1; + + ret = mtd_bdi_init(&mtd_bdi_ro_mappable, "mtd-romap"); + if (ret) + goto err_bdi2; + + ret = mtd_bdi_init(&mtd_bdi_rw_mappable, "mtd-rwmap"); + if (ret) + goto err_bdi3; + + proc_mtd = proc_create("mtd", 0, NULL, &mtd_proc_ops); + + ret = init_mtdchar(); + if (ret) + goto out_procfs; + + return 0; + +out_procfs: + if (proc_mtd) + remove_proc_entry("mtd", NULL); +err_bdi3: + bdi_destroy(&mtd_bdi_ro_mappable); +err_bdi2: + bdi_destroy(&mtd_bdi_unmappable); +err_bdi1: + class_unregister(&mtd_class); +err_reg: + pr_err("Error registering mtd class or bdi: %d\n", ret); + return ret; +} + +static void __exit cleanup_mtd(void) +{ + cleanup_mtdchar(); + if (proc_mtd) + remove_proc_entry("mtd", NULL); + class_unregister(&mtd_class); + bdi_destroy(&mtd_bdi_unmappable); + bdi_destroy(&mtd_bdi_ro_mappable); + bdi_destroy(&mtd_bdi_rw_mappable); +} + +module_init(init_mtd); +module_exit(cleanup_mtd); +#endif + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>"); +MODULE_DESCRIPTION("Core MTD registration and access routines"); diff --git a/drivers/mtd/mtdcore.h b/drivers/mtd/mtdcore.h new file mode 100644 index 0000000..7b03533 --- /dev/null +++ b/drivers/mtd/mtdcore.h @@ -0,0 +1,23 @@ +/* + * These are exported solely for the purpose of mtd_blkdevs.c and mtdchar.c. + * You should not use them for _anything_ else. + */ + +extern struct mutex mtd_table_mutex; + +struct mtd_info *__mtd_next_device(int i); +int add_mtd_device(struct mtd_info *mtd); +int del_mtd_device(struct mtd_info *mtd); +int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *, int); +int del_mtd_partitions(struct mtd_info *); +int parse_mtd_partitions(struct mtd_info *master, const char * const *types, + struct mtd_partition **pparts, + struct mtd_part_parser_data *data); + +int __init init_mtdchar(void); +void __exit cleanup_mtdchar(void); + +#define mtd_for_each_device(mtd) \ + for ((mtd) = __mtd_next_device(0); \ + (mtd) != NULL; \ + (mtd) = __mtd_next_device(mtd->index + 1)) diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 146ce11..d20b857 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -1,35 +1,50 @@ /* * Simple MTD partitioning layer * - * (C) 2000 Nicolas Pitre <nico@cam.org> + * Copyright © 2000 Nicolas Pitre <nico@fluxnic.net> + * Copyright © 2002 Thomas Gleixner <gleixner@linutronix.de> + * Copyright © 2000-2010 David Woodhouse <dwmw2@infradead.org> * - * This code is GPL + * SPDX-License-Identifier: GPL-2.0+ * - * 02-21-2002 Thomas Gleixner <gleixner@autronix.de> - * added support for read_oob, write_oob */ +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/kmod.h> +#endif + #include <common.h> #include <malloc.h> #include <asm/errno.h> +#include <linux/compat.h> +#include <ubi_uboot.h> -#include <linux/types.h> -#include <linux/list.h> #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> -#include <linux/compat.h> +#include <linux/err.h> + +#include "mtdcore.h" /* Our partition linked list */ -struct list_head mtd_partitions; +static LIST_HEAD(mtd_partitions); +#ifndef __UBOOT__ +static DEFINE_MUTEX(mtd_partitions_mutex); +#else +DEFINE_MUTEX(mtd_partitions_mutex); +#endif /* Our partition node structure */ struct mtd_part { struct mtd_info mtd; struct mtd_info *master; uint64_t offset; - int index; struct list_head list; - int registered; }; /* @@ -39,6 +54,30 @@ struct mtd_part { #define PART(x) ((struct mtd_part *)(x)) +#ifdef __UBOOT__ +/* from mm/util.c */ + +/** + * kstrdup - allocate space for and copy an existing string + * @s: the string to duplicate + * @gfp: the GFP mask used in the kmalloc() call when allocating memory + */ +char *kstrdup(const char *s, gfp_t gfp) +{ + size_t len; + char *buf; + + if (!s) + return NULL; + + len = strlen(s) + 1; + buf = kmalloc(len, gfp); + if (buf) + memcpy(buf, s, len); + return buf; +} +#endif + /* * MTD methods which simply translate the effective address and pass through * to the _real_ device. @@ -52,7 +91,8 @@ static int part_read(struct mtd_info *mtd, loff_t from, size_t len, int res; stats = part->master->ecc_stats; - res = mtd_read(part->master, from + part->offset, len, retlen, buf); + res = part->master->_read(part->master, from + part->offset, len, + retlen, buf); if (unlikely(mtd_is_eccerr(res))) mtd->ecc_stats.failed += part->master->ecc_stats.failed - stats.failed; @@ -62,6 +102,36 @@ static int part_read(struct mtd_info *mtd, loff_t from, size_t len, return res; } +#ifndef __UBOOT__ +static int part_point(struct mtd_info *mtd, loff_t from, size_t len, + size_t *retlen, void **virt, resource_size_t *phys) +{ + struct mtd_part *part = PART(mtd); + + return part->master->_point(part->master, from + part->offset, len, + retlen, virt, phys); +} + +static int part_unpoint(struct mtd_info *mtd, loff_t from, size_t len) +{ + struct mtd_part *part = PART(mtd); + + return part->master->_unpoint(part->master, from + part->offset, len); +} +#endif + +static unsigned long part_get_unmapped_area(struct mtd_info *mtd, + unsigned long len, + unsigned long offset, + unsigned long flags) +{ + struct mtd_part *part = PART(mtd); + + offset += part->offset; + return part->master->_get_unmapped_area(part->master, len, offset, + flags); +} + static int part_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { @@ -72,8 +142,25 @@ static int part_read_oob(struct mtd_info *mtd, loff_t from, return -EINVAL; if (ops->datbuf && from + ops->len > mtd->size) return -EINVAL; - res = mtd_read_oob(part->master, from + part->offset, ops); + /* + * If OOB is also requested, make sure that we do not read past the end + * of this partition. + */ + if (ops->oobbuf) { + size_t len, pages; + + if (ops->mode == MTD_OPS_AUTO_OOB) + len = mtd->oobavail; + else + len = mtd->oobsize; + pages = mtd_div_by_ws(mtd->size, mtd); + pages -= mtd_div_by_ws(from, mtd); + if (ops->ooboffs + ops->ooblen > pages * len) + return -EINVAL; + } + + res = part->master->_read_oob(part->master, from + part->offset, ops); if (unlikely(res)) { if (mtd_is_bitflip(res)) mtd->ecc_stats.corrected++; @@ -87,35 +174,46 @@ static int part_read_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { struct mtd_part *part = PART(mtd); - return mtd_read_user_prot_reg(part->master, from, len, retlen, buf); + return part->master->_read_user_prot_reg(part->master, from, len, + retlen, buf); } static int part_get_user_prot_info(struct mtd_info *mtd, struct otp_info *buf, size_t len) { struct mtd_part *part = PART(mtd); - return mtd_get_user_prot_info(part->master, buf, len); + return part->master->_get_user_prot_info(part->master, buf, len); } static int part_read_fact_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { struct mtd_part *part = PART(mtd); - return mtd_read_fact_prot_reg(part->master, from, len, retlen, buf); + return part->master->_read_fact_prot_reg(part->master, from, len, + retlen, buf); } static int part_get_fact_prot_info(struct mtd_info *mtd, struct otp_info *buf, size_t len) { struct mtd_part *part = PART(mtd); - return mtd_get_fact_prot_info(part->master, buf, len); + return part->master->_get_fact_prot_info(part->master, buf, len); } static int part_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) { struct mtd_part *part = PART(mtd); - return mtd_write(part->master, to + part->offset, len, retlen, buf); + return part->master->_write(part->master, to + part->offset, len, + retlen, buf); +} + +static int part_panic_write(struct mtd_info *mtd, loff_t to, size_t len, + size_t *retlen, const u_char *buf) +{ + struct mtd_part *part = PART(mtd); + return part->master->_panic_write(part->master, to + part->offset, len, + retlen, buf); } static int part_write_oob(struct mtd_info *mtd, loff_t to, @@ -127,30 +225,41 @@ static int part_write_oob(struct mtd_info *mtd, loff_t to, return -EINVAL; if (ops->datbuf && to + ops->len > mtd->size) return -EINVAL; - return mtd_write_oob(part->master, to + part->offset, ops); + return part->master->_write_oob(part->master, to + part->offset, ops); } static int part_write_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { struct mtd_part *part = PART(mtd); - return mtd_write_user_prot_reg(part->master, from, len, retlen, buf); + return part->master->_write_user_prot_reg(part->master, from, len, + retlen, buf); } static int part_lock_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len) { struct mtd_part *part = PART(mtd); - return mtd_lock_user_prot_reg(part->master, from, len); + return part->master->_lock_user_prot_reg(part->master, from, len); } +#ifndef __UBOOT__ +static int part_writev(struct mtd_info *mtd, const struct kvec *vecs, + unsigned long count, loff_t to, size_t *retlen) +{ + struct mtd_part *part = PART(mtd); + return part->master->_writev(part->master, vecs, count, + to + part->offset, retlen); +} +#endif + static int part_erase(struct mtd_info *mtd, struct erase_info *instr) { struct mtd_part *part = PART(mtd); int ret; instr->addr += part->offset; - ret = mtd_erase(part->master, instr); + ret = part->master->_erase(part->master, instr); if (ret) { if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN) instr->fail_addr -= part->offset; @@ -171,30 +280,51 @@ void mtd_erase_callback(struct erase_info *instr) if (instr->callback) instr->callback(instr); } +EXPORT_SYMBOL_GPL(mtd_erase_callback); static int part_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) { struct mtd_part *part = PART(mtd); - return mtd_lock(part->master, ofs + part->offset, len); + return part->master->_lock(part->master, ofs + part->offset, len); } static int part_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) { struct mtd_part *part = PART(mtd); - return mtd_unlock(part->master, ofs + part->offset, len); + return part->master->_unlock(part->master, ofs + part->offset, len); +} + +static int part_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len) +{ + struct mtd_part *part = PART(mtd); + return part->master->_is_locked(part->master, ofs + part->offset, len); } static void part_sync(struct mtd_info *mtd) { struct mtd_part *part = PART(mtd); - mtd_sync(part->master); + part->master->_sync(part->master); } +#ifndef __UBOOT__ +static int part_suspend(struct mtd_info *mtd) +{ + struct mtd_part *part = PART(mtd); + return part->master->_suspend(part->master); +} + +static void part_resume(struct mtd_info *mtd) +{ + struct mtd_part *part = PART(mtd); + part->master->_resume(part->master); +} +#endif + static int part_block_isbad(struct mtd_info *mtd, loff_t ofs) { struct mtd_part *part = PART(mtd); ofs += part->offset; - return mtd_block_isbad(part->master, ofs); + return part->master->_block_isbad(part->master, ofs); } static int part_block_markbad(struct mtd_info *mtd, loff_t ofs) @@ -203,12 +333,18 @@ static int part_block_markbad(struct mtd_info *mtd, loff_t ofs) int res; ofs += part->offset; - res = mtd_block_markbad(part->master, ofs); + res = part->master->_block_markbad(part->master, ofs); if (!res) mtd->ecc_stats.badblocks++; return res; } +static inline void free_partition(struct mtd_part *p) +{ + kfree(p->mtd.name); + kfree(p); +} + /* * This function unregisters and destroy all slave MTD objects which are * attached to the given master MTD object. @@ -217,49 +353,78 @@ static int part_block_markbad(struct mtd_info *mtd, loff_t ofs) int del_mtd_partitions(struct mtd_info *master) { struct mtd_part *slave, *next; + int ret, err = 0; + mutex_lock(&mtd_partitions_mutex); list_for_each_entry_safe(slave, next, &mtd_partitions, list) if (slave->master == master) { + ret = del_mtd_device(&slave->mtd); + if (ret < 0) { + err = ret; + continue; + } list_del(&slave->list); - if (slave->registered) - del_mtd_device(&slave->mtd); - kfree(slave); + free_partition(slave); } + mutex_unlock(&mtd_partitions_mutex); - return 0; + return err; } -static struct mtd_part *add_one_partition(struct mtd_info *master, - const struct mtd_partition *part, int partno, - uint64_t cur_offset) +static struct mtd_part *allocate_partition(struct mtd_info *master, + const struct mtd_partition *part, int partno, + uint64_t cur_offset) { struct mtd_part *slave; + char *name; /* allocate the partition structure */ slave = kzalloc(sizeof(*slave), GFP_KERNEL); - if (!slave) { + name = kstrdup(part->name, GFP_KERNEL); + if (!name || !slave) { printk(KERN_ERR"memory allocation error while creating partitions for \"%s\"\n", - master->name); - del_mtd_partitions(master); - return NULL; + master->name); + kfree(name); + kfree(slave); + return ERR_PTR(-ENOMEM); } - list_add(&slave->list, &mtd_partitions); /* set up the MTD object for this partition */ slave->mtd.type = master->type; slave->mtd.flags = master->flags & ~part->mask_flags; slave->mtd.size = part->size; slave->mtd.writesize = master->writesize; + slave->mtd.writebufsize = master->writebufsize; slave->mtd.oobsize = master->oobsize; slave->mtd.oobavail = master->oobavail; slave->mtd.subpage_sft = master->subpage_sft; - slave->mtd.name = part->name; + slave->mtd.name = name; slave->mtd.owner = master->owner; +#ifndef __UBOOT__ + slave->mtd.backing_dev_info = master->backing_dev_info; + + /* NOTE: we don't arrange MTDs as a tree; it'd be error-prone + * to have the same data be in two different partitions. + */ + slave->mtd.dev.parent = master->dev.parent; +#endif slave->mtd._read = part_read; slave->mtd._write = part_write; + if (master->_panic_write) + slave->mtd._panic_write = part_panic_write; + +#ifndef __UBOOT__ + if (master->_point && master->_unpoint) { + slave->mtd._point = part_point; + slave->mtd._unpoint = part_unpoint; + } +#endif + + if (master->_get_unmapped_area) + slave->mtd._get_unmapped_area = part_get_unmapped_area; if (master->_read_oob) slave->mtd._read_oob = part_read_oob; if (master->_write_oob) @@ -278,10 +443,21 @@ static struct mtd_part *add_one_partition(struct mtd_info *master, slave->mtd._get_fact_prot_info = part_get_fact_prot_info; if (master->_sync) slave->mtd._sync = part_sync; +#ifndef __UBOOT__ + if (!partno && !master->dev.class && master->_suspend && + master->_resume) { + slave->mtd._suspend = part_suspend; + slave->mtd._resume = part_resume; + } + if (master->_writev) + slave->mtd._writev = part_writev; +#endif if (master->_lock) slave->mtd._lock = part_lock; if (master->_unlock) slave->mtd._unlock = part_unlock; + if (master->_is_locked) + slave->mtd._is_locked = part_is_locked; if (master->_block_isbad) slave->mtd._block_isbad = part_block_isbad; if (master->_block_markbad) @@ -289,7 +465,6 @@ static struct mtd_part *add_one_partition(struct mtd_info *master, slave->mtd._erase = part_erase; slave->master = master; slave->offset = part->offset; - slave->index = partno; if (slave->offset == MTDPART_OFS_APPEND) slave->offset = cur_offset; @@ -298,18 +473,29 @@ static struct mtd_part *add_one_partition(struct mtd_info *master, if (mtd_mod_by_eb(cur_offset, master) != 0) { /* Round up to next erasesize */ slave->offset = (mtd_div_by_eb(cur_offset, master) + 1) * master->erasesize; - debug("Moving partition %d: 0x%012llx -> 0x%012llx\n", - partno, (unsigned long long)cur_offset, - (unsigned long long)slave->offset); + debug("Moving partition %d: " + "0x%012llx -> 0x%012llx\n", partno, + (unsigned long long)cur_offset, (unsigned long long)slave->offset); + } + } + if (slave->offset == MTDPART_OFS_RETAIN) { + slave->offset = cur_offset; + if (master->size - slave->offset >= slave->mtd.size) { + slave->mtd.size = master->size - slave->offset + - slave->mtd.size; + } else { + debug("mtd partition \"%s\" doesn't have enough space: %#llx < %#llx, disabled\n", + part->name, master->size - slave->offset, + slave->mtd.size); + /* register to preserve ordering */ + goto out_register; } } if (slave->mtd.size == MTDPART_SIZ_FULL) slave->mtd.size = master->size - slave->offset; - debug("0x%012llx-0x%012llx : \"%s\"\n", - (unsigned long long)slave->offset, - (unsigned long long)(slave->offset + slave->mtd.size), - slave->mtd.name); + debug("0x%012llx-0x%012llx : \"%s\"\n", (unsigned long long)slave->offset, + (unsigned long long)(slave->offset + slave->mtd.size), slave->mtd.name); /* let's do some sanity checks */ if (slave->offset >= master->size) { @@ -336,7 +522,8 @@ static struct mtd_part *add_one_partition(struct mtd_info *master, for (i = 0; i < max && regions[i].offset <= slave->offset; i++) ; /* The loop searched for the region _behind_ the first one */ - i--; + if (i > 0) + i--; /* Pick biggest erasesize */ for (; i < max && regions[i].offset < end; i++) { @@ -367,6 +554,10 @@ static struct mtd_part *add_one_partition(struct mtd_info *master, } slave->mtd.ecclayout = master->ecclayout; + slave->mtd.ecc_step_size = master->ecc_step_size; + slave->mtd.ecc_strength = master->ecc_strength; + slave->mtd.bitflip_threshold = master->bitflip_threshold; + if (master->_block_isbad) { uint64_t offs = 0; @@ -378,18 +569,89 @@ static struct mtd_part *add_one_partition(struct mtd_info *master, } out_register: - if (part->mtdp) { - /* store the object pointer (caller may or may not register it*/ - *part->mtdp = &slave->mtd; - slave->registered = 0; - } else { - /* register our partition */ - add_mtd_device(&slave->mtd); - slave->registered = 1; - } return slave; } +int mtd_add_partition(struct mtd_info *master, const char *name, + long long offset, long long length) +{ + struct mtd_partition part; + struct mtd_part *p, *new; + uint64_t start, end; + int ret = 0; + + /* the direct offset is expected */ + if (offset == MTDPART_OFS_APPEND || + offset == MTDPART_OFS_NXTBLK) + return -EINVAL; + + if (length == MTDPART_SIZ_FULL) + length = master->size - offset; + + if (length <= 0) + return -EINVAL; + + part.name = name; + part.size = length; + part.offset = offset; + part.mask_flags = 0; + part.ecclayout = NULL; + + new = allocate_partition(master, &part, -1, offset); + if (IS_ERR(new)) + return PTR_ERR(new); + + start = offset; + end = offset + length; + + mutex_lock(&mtd_partitions_mutex); + list_for_each_entry(p, &mtd_partitions, list) + if (p->master == master) { + if ((start >= p->offset) && + (start < (p->offset + p->mtd.size))) + goto err_inv; + + if ((end >= p->offset) && + (end < (p->offset + p->mtd.size))) + goto err_inv; + } + + list_add(&new->list, &mtd_partitions); + mutex_unlock(&mtd_partitions_mutex); + + add_mtd_device(&new->mtd); + + return ret; +err_inv: + mutex_unlock(&mtd_partitions_mutex); + free_partition(new); + return -EINVAL; +} +EXPORT_SYMBOL_GPL(mtd_add_partition); + +int mtd_del_partition(struct mtd_info *master, int partno) +{ + struct mtd_part *slave, *next; + int ret = -EINVAL; + + mutex_lock(&mtd_partitions_mutex); + list_for_each_entry_safe(slave, next, &mtd_partitions, list) + if ((slave->master == master) && + (slave->mtd.index == partno)) { + ret = del_mtd_device(&slave->mtd); + if (ret < 0) + break; + + list_del(&slave->list); + free_partition(slave); + break; + } + mutex_unlock(&mtd_partitions_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(mtd_del_partition); + /* * This function, given a master MTD object and a partition table, creates * and registers slave MTD objects which are bound to the master according to @@ -407,6 +669,7 @@ int add_mtd_partitions(struct mtd_info *master, uint64_t cur_offset = 0; int i; +#ifdef __UBOOT__ /* * Need to init the list here, since LIST_INIT() does not * work on platforms where relocation has problems (like MIPS @@ -414,15 +677,147 @@ int add_mtd_partitions(struct mtd_info *master, */ if (mtd_partitions.next == NULL) INIT_LIST_HEAD(&mtd_partitions); +#endif debug("Creating %d MTD partitions on \"%s\":\n", nbparts, master->name); for (i = 0; i < nbparts; i++) { - slave = add_one_partition(master, parts + i, i, cur_offset); - if (!slave) - return -ENOMEM; + slave = allocate_partition(master, parts + i, i, cur_offset); + if (IS_ERR(slave)) + return PTR_ERR(slave); + + mutex_lock(&mtd_partitions_mutex); + list_add(&slave->list, &mtd_partitions); + mutex_unlock(&mtd_partitions_mutex); + + add_mtd_device(&slave->mtd); + cur_offset = slave->offset + slave->mtd.size; } return 0; } + +#ifndef __UBOOT__ +static DEFINE_SPINLOCK(part_parser_lock); +static LIST_HEAD(part_parsers); + +static struct mtd_part_parser *get_partition_parser(const char *name) +{ + struct mtd_part_parser *p, *ret = NULL; + + spin_lock(&part_parser_lock); + + list_for_each_entry(p, &part_parsers, list) + if (!strcmp(p->name, name) && try_module_get(p->owner)) { + ret = p; + break; + } + + spin_unlock(&part_parser_lock); + + return ret; +} + +#define put_partition_parser(p) do { module_put((p)->owner); } while (0) + +void register_mtd_parser(struct mtd_part_parser *p) +{ + spin_lock(&part_parser_lock); + list_add(&p->list, &part_parsers); + spin_unlock(&part_parser_lock); +} +EXPORT_SYMBOL_GPL(register_mtd_parser); + +void deregister_mtd_parser(struct mtd_part_parser *p) +{ + spin_lock(&part_parser_lock); + list_del(&p->list); + spin_unlock(&part_parser_lock); +} +EXPORT_SYMBOL_GPL(deregister_mtd_parser); + +/* + * Do not forget to update 'parse_mtd_partitions()' kerneldoc comment if you + * are changing this array! + */ +static const char * const default_mtd_part_types[] = { + "cmdlinepart", + "ofpart", + NULL +}; + +/** + * parse_mtd_partitions - parse MTD partitions + * @master: the master partition (describes whole MTD device) + * @types: names of partition parsers to try or %NULL + * @pparts: array of partitions found is returned here + * @data: MTD partition parser-specific data + * + * This function tries to find partition on MTD device @master. It uses MTD + * partition parsers, specified in @types. However, if @types is %NULL, then + * the default list of parsers is used. The default list contains only the + * "cmdlinepart" and "ofpart" parsers ATM. + * Note: If there are more then one parser in @types, the kernel only takes the + * partitions parsed out by the first parser. + * + * This function may return: + * o a negative error code in case of failure + * o zero if no partitions were found + * o a positive number of found partitions, in which case on exit @pparts will + * point to an array containing this number of &struct mtd_info objects. + */ +int parse_mtd_partitions(struct mtd_info *master, const char *const *types, + struct mtd_partition **pparts, + struct mtd_part_parser_data *data) +{ + struct mtd_part_parser *parser; + int ret = 0; + + if (!types) + types = default_mtd_part_types; + + for ( ; ret <= 0 && *types; types++) { + parser = get_partition_parser(*types); + if (!parser && !request_module("%s", *types)) + parser = get_partition_parser(*types); + if (!parser) + continue; + ret = (*parser->parse_fn)(master, pparts, data); + put_partition_parser(parser); + if (ret > 0) { + printk(KERN_NOTICE "%d %s partitions found on MTD device %s\n", + ret, parser->name, master->name); + break; + } + } + return ret; +} +#endif + +int mtd_is_partition(const struct mtd_info *mtd) +{ + struct mtd_part *part; + int ispart = 0; + + mutex_lock(&mtd_partitions_mutex); + list_for_each_entry(part, &mtd_partitions, list) + if (&part->mtd == mtd) { + ispart = 1; + break; + } + mutex_unlock(&mtd_partitions_mutex); + + return ispart; +} +EXPORT_SYMBOL_GPL(mtd_is_partition); + +/* Returns the size of the entire flash chip */ +uint64_t mtd_get_device_size(const struct mtd_info *mtd) +{ + if (!mtd_is_partition(mtd)) + return mtd->size; + + return PART(mtd)->master->size; +} +EXPORT_SYMBOL_GPL(mtd_get_device_size); diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c index 2f31fc9..7e1e6ec 100644 --- a/drivers/mtd/nand/fsl_elbc_nand.c +++ b/drivers/mtd/nand/fsl_elbc_nand.c @@ -561,6 +561,7 @@ static void fsl_elbc_read_buf(struct mtd_info *mtd, u8 *buf, int len) len, avail); } +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) /* * Verify buffer against the FCM Controller Data Buffer */ @@ -593,6 +594,7 @@ static int fsl_elbc_verify_buf(struct mtd_info *mtd, ctrl->index += len; return i == len && ctrl->status == LTESR_CC ? 0 : -EIO; } +#endif /* This function is called after Program and Erase Operations to * check for success or failure. @@ -725,7 +727,9 @@ static int fsl_elbc_chip_init(int devnum, u8 *addr) nand->read_byte = fsl_elbc_read_byte; nand->write_buf = fsl_elbc_write_buf; nand->read_buf = fsl_elbc_read_buf; +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) nand->verify_buf = fsl_elbc_verify_buf; +#endif nand->select_chip = fsl_elbc_select_chip; nand->cmdfunc = fsl_elbc_cmdfunc; nand->waitfunc = fsl_elbc_wait; diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index 8b453cb..2f04c69 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -684,6 +684,7 @@ static void fsl_ifc_read_buf(struct mtd_info *mtd, u8 *buf, int len) __func__, len, avail); } +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) /* * Verify buffer against the IFC Controller Data Buffer */ @@ -716,6 +717,7 @@ static int fsl_ifc_verify_buf(struct mtd_info *mtd, ctrl->index += len; return i == len && ctrl->status == IFC_NAND_EVTER_STAT_OPC ? 0 : -EIO; } +#endif /* This function is called after Program and Erase Operations to * check for success or failure. @@ -939,7 +941,9 @@ static int fsl_ifc_chip_init(int devnum, u8 *addr) nand->write_buf = fsl_ifc_write_buf; nand->read_buf = fsl_ifc_read_buf; +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) nand->verify_buf = fsl_ifc_verify_buf; +#endif nand->select_chip = fsl_ifc_select_chip; nand->cmdfunc = fsl_ifc_cmdfunc; nand->waitfunc = fsl_ifc_wait; diff --git a/drivers/mtd/nand/fsl_upm.c b/drivers/mtd/nand/fsl_upm.c index 3ae0044..65ce98a 100644 --- a/drivers/mtd/nand/fsl_upm.c +++ b/drivers/mtd/nand/fsl_upm.c @@ -153,6 +153,7 @@ static void upm_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len) buf[i] = in_8(chip->IO_ADDR_R); } +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) static int upm_nand_verify_buf(struct mtd_info *mtd, const u_char *buf, int len) { int i; @@ -165,6 +166,7 @@ static int upm_nand_verify_buf(struct mtd_info *mtd, const u_char *buf, int len) return 0; } +#endif static int nand_dev_ready(struct mtd_info *mtd) { @@ -191,7 +193,9 @@ int fsl_upm_nand_init(struct nand_chip *chip, struct fsl_upm_nand *fun) chip->read_byte = upm_nand_read_byte; chip->read_buf = upm_nand_read_buf; chip->write_buf = upm_nand_write_buf; +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) chip->verify_buf = upm_nand_verify_buf; +#endif if (fun->dev_ready) chip->dev_ready = nand_dev_ready; diff --git a/drivers/mtd/nand/mpc5121_nfc.c b/drivers/mtd/nand/mpc5121_nfc.c index d0f3a35..7233bfc 100644 --- a/drivers/mtd/nand/mpc5121_nfc.c +++ b/drivers/mtd/nand/mpc5121_nfc.c @@ -459,6 +459,7 @@ static void mpc5121_nfc_write_buf(struct mtd_info *mtd, mpc5121_nfc_buf_copy(mtd, (u_char *) buf, len, 1); } +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) /* Compare buffer with NAND flash */ static int mpc5121_nfc_verify_buf(struct mtd_info *mtd, const u_char * buf, int len) @@ -479,6 +480,7 @@ static int mpc5121_nfc_verify_buf(struct mtd_info *mtd, return 0; } +#endif /* Read byte from NFC buffers */ static u8 mpc5121_nfc_read_byte(struct mtd_info *mtd) @@ -607,7 +609,9 @@ int board_nand_init(struct nand_chip *chip) chip->read_word = mpc5121_nfc_read_word; chip->read_buf = mpc5121_nfc_read_buf; chip->write_buf = mpc5121_nfc_write_buf; +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) chip->verify_buf = mpc5121_nfc_verify_buf; +#endif chip->select_chip = mpc5121_nfc_select_chip; chip->bbt_options = NAND_BBT_USE_FLASH; chip->ecc.mode = NAND_ECC_SOFT; diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c index ed0ca3a..2e5b5b9 100644 --- a/drivers/mtd/nand/mxc_nand.c +++ b/drivers/mtd/nand/mxc_nand.c @@ -949,6 +949,8 @@ static void mxc_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len) host->col_addr = col; } +#ifdef __UBOOT__ +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) /* * Used by the upper layer to verify the data in NAND Flash * with the data in the buf. @@ -972,6 +974,8 @@ static int mxc_nand_verify_buf(struct mtd_info *mtd, return 0; } +#endif +#endif /* * This function is used by upper layer for select and @@ -1203,7 +1207,11 @@ int board_nand_init(struct nand_chip *this) this->read_word = mxc_nand_read_word; this->write_buf = mxc_nand_write_buf; this->read_buf = mxc_nand_read_buf; +#ifdef __UBOOT__ +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) this->verify_buf = mxc_nand_verify_buf; +#endif +#endif host->regs = (struct mxc_nand_regs __iomem *)CONFIG_MXC_NAND_REGS_BASE; #ifdef MXC_NFC_V3_2 diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 376976d..ae61cca 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -4,7 +4,6 @@ * Overview: * This is the generic MTD driver for NAND flash devices. It should be * capable of working with almost all NAND chips currently available. - * Basic support for AG-AND chips is provided. * * Additional technical information is available on * http://www.linux-mtd.infradead.org/doc/nand.html @@ -22,8 +21,6 @@ * Enable cached programming for 2k page size chips * Check, if mtd->ecctype should be set to MTD_ECC_HW * if we have HW ECC support. - * The AG-AND chips have nice features for speed improvement, - * which are not supported yet. Read / program 4 pages in one go. * BBT table is not serialized, has to be fixed * * This program is free software; you can redistribute it and/or modify @@ -32,10 +29,29 @@ * */ -#include <common.h> - -#define ENOTSUPP 524 /* Operation is not supported */ +#define __UBOOT__ +#ifndef __UBOOT__ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/nand.h> +#include <linux/mtd/nand_ecc.h> +#include <linux/mtd/nand_bch.h> +#include <linux/interrupt.h> +#include <linux/bitops.h> +#include <linux/leds.h> +#include <linux/io.h> +#include <linux/mtd/partitions.h> +#else +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <common.h> #include <malloc.h> #include <watchdog.h> #include <linux/err.h> @@ -44,11 +60,9 @@ #include <linux/mtd/nand.h> #include <linux/mtd/nand_ecc.h> #include <linux/mtd/nand_bch.h> - #ifdef CONFIG_MTD_PARTITIONS #include <linux/mtd/partitions.h> #endif - #include <asm/io.h> #include <asm/errno.h> @@ -63,6 +77,9 @@ #define CONFIG_SYS_NAND_RESET_CNT 200000 #endif +static bool is_module_text_address(unsigned long addr) {return 0;} +#endif + /* Define default oob placement schemes for large and small page devices */ static struct nand_ecclayout nand_oob_8 = { .eccbytes = 3, @@ -107,13 +124,16 @@ static struct nand_ecclayout nand_oob_128 = { .length = 78} } }; -static int nand_get_device(struct nand_chip *chip, struct mtd_info *mtd, - int new_state); +static int nand_get_device(struct mtd_info *mtd, int new_state); static int nand_do_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops); -static int nand_wait(struct mtd_info *mtd, struct nand_chip *this); +/* + * For devices which display every fart in the system on a separate LED. Is + * compiled away when LED support is disabled. + */ +DEFINE_LED_TRIGGER(nand_led_trigger); static int check_offs_len(struct mtd_info *mtd, loff_t ofs, uint64_t len) @@ -122,15 +142,14 @@ static int check_offs_len(struct mtd_info *mtd, int ret = 0; /* Start address must align on block boundary */ - if (ofs & ((1 << chip->phys_erase_shift) - 1)) { - MTDDEBUG(MTD_DEBUG_LEVEL0, "%s: Unaligned address\n", __func__); + if (ofs & ((1ULL << chip->phys_erase_shift) - 1)) { + pr_debug("%s: unaligned address\n", __func__); ret = -EINVAL; } /* Length must align on block boundary */ - if (len & ((1 << chip->phys_erase_shift) - 1)) { - MTDDEBUG(MTD_DEBUG_LEVEL0, "%s: Length not block aligned\n", - __func__); + if (len & ((1ULL << chip->phys_erase_shift) - 1)) { + pr_debug("%s: length not block aligned\n", __func__); ret = -EINVAL; } @@ -141,30 +160,43 @@ static int check_offs_len(struct mtd_info *mtd, * nand_release_device - [GENERIC] release chip * @mtd: MTD device structure * - * Deselect, release chip lock and wake up anyone waiting on the device. + * Release chip lock and wake up anyone waiting on the device. */ static void nand_release_device(struct mtd_info *mtd) { struct nand_chip *chip = mtd->priv; +#ifndef __UBOOT__ + /* Release the controller and the chip */ + spin_lock(&chip->controller->lock); + chip->controller->active = NULL; + chip->state = FL_READY; + wake_up(&chip->controller->wq); + spin_unlock(&chip->controller->lock); +#else /* De-select the NAND device */ chip->select_chip(mtd, -1); +#endif } /** * nand_read_byte - [DEFAULT] read one byte from the chip * @mtd: MTD device structure * - * Default read function for 8bit buswidth. + * Default read function for 8bit buswidth */ +#ifndef __UBOOT__ +static uint8_t nand_read_byte(struct mtd_info *mtd) +#else uint8_t nand_read_byte(struct mtd_info *mtd) +#endif { struct nand_chip *chip = mtd->priv; return readb(chip->IO_ADDR_R); } /** - * nand_read_byte16 - [DEFAULT] read one byte endianess aware from the chip + * nand_read_byte16 - [DEFAULT] read one byte endianness aware from the chip * nand_read_byte16 - [DEFAULT] read one byte endianness aware from the chip * @mtd: MTD device structure * @@ -213,6 +245,88 @@ static void nand_select_chip(struct mtd_info *mtd, int chipnr) } /** + * nand_write_byte - [DEFAULT] write single byte to chip + * @mtd: MTD device structure + * @byte: value to write + * + * Default function to write a byte to I/O[7:0] + */ +static void nand_write_byte(struct mtd_info *mtd, uint8_t byte) +{ + struct nand_chip *chip = mtd->priv; + + chip->write_buf(mtd, &byte, 1); +} + +/** + * nand_write_byte16 - [DEFAULT] write single byte to a chip with width 16 + * @mtd: MTD device structure + * @byte: value to write + * + * Default function to write a byte to I/O[7:0] on a 16-bit wide chip. + */ +static void nand_write_byte16(struct mtd_info *mtd, uint8_t byte) +{ + struct nand_chip *chip = mtd->priv; + uint16_t word = byte; + + /* + * It's not entirely clear what should happen to I/O[15:8] when writing + * a byte. The ONFi spec (Revision 3.1; 2012-09-19, Section 2.16) reads: + * + * When the host supports a 16-bit bus width, only data is + * transferred at the 16-bit width. All address and command line + * transfers shall use only the lower 8-bits of the data bus. During + * command transfers, the host may place any value on the upper + * 8-bits of the data bus. During address transfers, the host shall + * set the upper 8-bits of the data bus to 00h. + * + * One user of the write_byte callback is nand_onfi_set_features. The + * four parameters are specified to be written to I/O[7:0], but this is + * neither an address nor a command transfer. Let's assume a 0 on the + * upper I/O lines is OK. + */ + chip->write_buf(mtd, (uint8_t *)&word, 2); +} + +#if defined(__UBOOT__) && !defined(CONFIG_BLACKFIN) +static void iowrite8_rep(void *addr, const uint8_t *buf, int len) +{ + int i; + + for (i = 0; i < len; i++) + writeb(buf[i], addr); +} +static void ioread8_rep(void *addr, uint8_t *buf, int len) +{ + int i; + + for (i = 0; i < len; i++) + buf[i] = readb(addr); +} + +static void ioread16_rep(void *addr, void *buf, int len) +{ + int i; + u16 *p = (u16 *) buf; + len >>= 1; + + for (i = 0; i < len; i++) + p[i] = readw(addr); +} + +static void iowrite16_rep(void *addr, void *buf, int len) +{ + int i; + u16 *p = (u16 *) buf; + len >>= 1; + + for (i = 0; i < len; i++) + writew(p[i], addr); +} +#endif + +/** * nand_write_buf - [DEFAULT] write buffer to chip * @mtd: MTD device structure * @buf: data buffer @@ -220,13 +334,15 @@ static void nand_select_chip(struct mtd_info *mtd, int chipnr) * * Default write function for 8bit buswidth. */ +#ifndef __UBOOT__ +static void nand_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len) +#else void nand_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len) +#endif { - int i; struct nand_chip *chip = mtd->priv; - for (i = 0; i < len; i++) - writeb(buf[i], chip->IO_ADDR_W); + iowrite8_rep(chip->IO_ADDR_W, buf, len); } /** @@ -237,15 +353,19 @@ void nand_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len) * * Default read function for 8bit buswidth. */ +#ifndef __UBOOT__ +static void nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len) +#else void nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len) +#endif { - int i; struct nand_chip *chip = mtd->priv; - for (i = 0; i < len; i++) - buf[i] = readb(chip->IO_ADDR_R); + ioread8_rep(chip->IO_ADDR_R, buf, len); } +#ifdef __UBOOT__ +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) /** * nand_verify_buf - [DEFAULT] Verify chip data against buffer * @mtd: MTD device structure @@ -266,14 +386,14 @@ static int nand_verify_buf(struct mtd_info *mtd, const uint8_t *buf, int len) } /** - * nand_write_buf16 - [DEFAULT] write buffer to chip + * nand_verify_buf16 - [DEFAULT] Verify chip data against buffer * @mtd: MTD device structure - * @buf: data buffer - * @len: number of bytes to write + * @buf: buffer containing the data to compare + * @len: number of bytes to compare * - * Default write function for 16bit buswidth. + * Default verify function for 16bit buswidth. */ -void nand_write_buf16(struct mtd_info *mtd, const uint8_t *buf, int len) +static int nand_verify_buf16(struct mtd_info *mtd, const uint8_t *buf, int len) { int i; struct nand_chip *chip = mtd->priv; @@ -281,49 +401,52 @@ void nand_write_buf16(struct mtd_info *mtd, const uint8_t *buf, int len) len >>= 1; for (i = 0; i < len; i++) - writew(p[i], chip->IO_ADDR_W); + if (p[i] != readw(chip->IO_ADDR_R)) + return -EFAULT; + return 0; } +#endif +#endif /** - * nand_read_buf16 - [DEFAULT] read chip data into buffer + * nand_write_buf16 - [DEFAULT] write buffer to chip * @mtd: MTD device structure - * @buf: buffer to store date - * @len: number of bytes to read + * @buf: data buffer + * @len: number of bytes to write * - * Default read function for 16bit buswidth. + * Default write function for 16bit buswidth. */ -void nand_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len) +#ifndef __UBOOT__ +static void nand_write_buf16(struct mtd_info *mtd, const uint8_t *buf, int len) +#else +void nand_write_buf16(struct mtd_info *mtd, const uint8_t *buf, int len) +#endif { - int i; struct nand_chip *chip = mtd->priv; u16 *p = (u16 *) buf; - len >>= 1; - for (i = 0; i < len; i++) - p[i] = readw(chip->IO_ADDR_R); + iowrite16_rep(chip->IO_ADDR_W, p, len >> 1); } /** - * nand_verify_buf16 - [DEFAULT] Verify chip data against buffer + * nand_read_buf16 - [DEFAULT] read chip data into buffer * @mtd: MTD device structure - * @buf: buffer containing the data to compare - * @len: number of bytes to compare + * @buf: buffer to store date + * @len: number of bytes to read * - * Default verify function for 16bit buswidth. + * Default read function for 16bit buswidth. */ -static int nand_verify_buf16(struct mtd_info *mtd, const uint8_t *buf, int len) +#ifndef __UBOOT__ +static void nand_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len) +#else +void nand_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len) +#endif { - int i; struct nand_chip *chip = mtd->priv; u16 *p = (u16 *) buf; - len >>= 1; - - for (i = 0; i < len; i++) - if (p[i] != readw(chip->IO_ADDR_R)) - return -EFAULT; - return 0; + ioread16_rep(chip->IO_ADDR_R, p, len >> 1); } /** @@ -348,7 +471,7 @@ static int nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip) if (getchip) { chipnr = (int)(ofs >> chip->chip_shift); - nand_get_device(chip, mtd, FL_READING); + nand_get_device(mtd, FL_READING); /* Select the NAND device */ chip->select_chip(mtd, chipnr); @@ -378,87 +501,97 @@ static int nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip) i++; } while (!res && i < 2 && (chip->bbt_options & NAND_BBT_SCAN2NDPAGE)); - if (getchip) + if (getchip) { + chip->select_chip(mtd, -1); nand_release_device(mtd); + } return res; } /** - * nand_default_block_markbad - [DEFAULT] mark a block bad + * nand_default_block_markbad - [DEFAULT] mark a block bad via bad block marker * @mtd: MTD device structure * @ofs: offset from device start * * This is the default implementation, which can be overridden by a hardware - * specific driver. We try operations in the following order, according to our - * bbt_options (NAND_BBT_NO_OOB_BBM and NAND_BBT_USE_FLASH): + * specific driver. It provides the details for writing a bad block marker to a + * block. + */ +static int nand_default_block_markbad(struct mtd_info *mtd, loff_t ofs) +{ + struct nand_chip *chip = mtd->priv; + struct mtd_oob_ops ops; + uint8_t buf[2] = { 0, 0 }; + int ret = 0, res, i = 0; + + ops.datbuf = NULL; + ops.oobbuf = buf; + ops.ooboffs = chip->badblockpos; + if (chip->options & NAND_BUSWIDTH_16) { + ops.ooboffs &= ~0x01; + ops.len = ops.ooblen = 2; + } else { + ops.len = ops.ooblen = 1; + } + ops.mode = MTD_OPS_PLACE_OOB; + + /* Write to first/last page(s) if necessary */ + if (chip->bbt_options & NAND_BBT_SCANLASTPAGE) + ofs += mtd->erasesize - mtd->writesize; + do { + res = nand_do_write_oob(mtd, ofs, &ops); + if (!ret) + ret = res; + + i++; + ofs += mtd->writesize; + } while ((chip->bbt_options & NAND_BBT_SCAN2NDPAGE) && i < 2); + + return ret; +} + +/** + * nand_block_markbad_lowlevel - mark a block bad + * @mtd: MTD device structure + * @ofs: offset from device start + * + * This function performs the generic NAND bad block marking steps (i.e., bad + * block table(s) and/or marker(s)). We only allow the hardware driver to + * specify how to write bad block markers to OOB (chip->block_markbad). + * + * We try operations in the following order: * (1) erase the affected block, to allow OOB marker to be written cleanly - * (2) update in-memory BBT - * (3) write bad block marker to OOB area of affected block - * (4) update flash-based BBT - * Note that we retain the first error encountered in (3) or (4), finish the + * (2) write bad block marker to OOB area of affected block (unless flag + * NAND_BBT_NO_OOB_BBM is present) + * (3) update the BBT + * Note that we retain the first error encountered in (2) or (3), finish the * procedures, and dump the error in the end. */ -static int nand_default_block_markbad(struct mtd_info *mtd, loff_t ofs) +static int nand_block_markbad_lowlevel(struct mtd_info *mtd, loff_t ofs) { struct nand_chip *chip = mtd->priv; - uint8_t buf[2] = { 0, 0 }; - int block, res, ret = 0, i = 0; - int write_oob = !(chip->bbt_options & NAND_BBT_NO_OOB_BBM); + int res, ret = 0; - if (write_oob) { + if (!(chip->bbt_options & NAND_BBT_NO_OOB_BBM)) { struct erase_info einfo; /* Attempt erase before marking OOB */ memset(&einfo, 0, sizeof(einfo)); einfo.mtd = mtd; einfo.addr = ofs; - einfo.len = 1 << chip->phys_erase_shift; + einfo.len = 1ULL << chip->phys_erase_shift; nand_erase_nand(mtd, &einfo, 0); - } - - /* Get block number */ - block = (int)(ofs >> chip->bbt_erase_shift); - /* Mark block bad in memory-based BBT */ - if (chip->bbt) - chip->bbt[block >> 2] |= 0x01 << ((block & 0x03) << 1); - - /* Write bad block marker to OOB */ - if (write_oob) { - struct mtd_oob_ops ops; - loff_t wr_ofs = ofs; - - nand_get_device(chip, mtd, FL_WRITING); - - ops.datbuf = NULL; - ops.oobbuf = buf; - ops.ooboffs = chip->badblockpos; - if (chip->options & NAND_BUSWIDTH_16) { - ops.ooboffs &= ~0x01; - ops.len = ops.ooblen = 2; - } else { - ops.len = ops.ooblen = 1; - } - ops.mode = MTD_OPS_PLACE_OOB; - - /* Write to first/last page(s) if necessary */ - if (chip->bbt_options & NAND_BBT_SCANLASTPAGE) - wr_ofs += mtd->erasesize - mtd->writesize; - do { - res = nand_do_write_oob(mtd, wr_ofs, &ops); - if (!ret) - ret = res; - - i++; - wr_ofs += mtd->writesize; - } while ((chip->bbt_options & NAND_BBT_SCAN2NDPAGE) && i < 2); + /* Write bad block marker to OOB */ + nand_get_device(mtd, FL_WRITING); + ret = chip->block_markbad(mtd, ofs); nand_release_device(mtd); } - /* Update flash-based bad block table */ - if (chip->bbt_options & NAND_BBT_USE_FLASH) { - res = nand_update_bbt(mtd, ofs); + /* Mark block bad in BBT */ + if (chip->bbt) { + res = nand_markbad_bbt(mtd, ofs); if (!ret) ret = res; } @@ -504,11 +637,6 @@ static int nand_block_checkbad(struct mtd_info *mtd, loff_t ofs, int getchip, { struct nand_chip *chip = mtd->priv; - if (!(chip->options & NAND_BBT_SCANNED)) { - chip->options |= NAND_BBT_SCANNED; - chip->scan_bbt(mtd); - } - if (!chip->bbt) return chip->block_bad(mtd, ofs, getchip); @@ -516,22 +644,63 @@ static int nand_block_checkbad(struct mtd_info *mtd, loff_t ofs, int getchip, return nand_isbad_bbt(mtd, ofs, allowbbt); } +#ifndef __UBOOT__ +/** + * panic_nand_wait_ready - [GENERIC] Wait for the ready pin after commands. + * @mtd: MTD device structure + * @timeo: Timeout + * + * Helper function for nand_wait_ready used when needing to wait in interrupt + * context. + */ +static void panic_nand_wait_ready(struct mtd_info *mtd, unsigned long timeo) +{ + struct nand_chip *chip = mtd->priv; + int i; + + /* Wait for the device to get ready */ + for (i = 0; i < timeo; i++) { + if (chip->dev_ready(mtd)) + break; + touch_softlockup_watchdog(); + mdelay(1); + } +} +#endif + /* Wait for the ready pin, after a command. The timeout is caught later. */ void nand_wait_ready(struct mtd_info *mtd) { struct nand_chip *chip = mtd->priv; +#ifndef __UBOOT__ + unsigned long timeo = jiffies + msecs_to_jiffies(20); + + /* 400ms timeout */ + if (in_interrupt() || oops_in_progress) + return panic_nand_wait_ready(mtd, 400); + + led_trigger_event(nand_led_trigger, LED_FULL); + /* Wait until command is processed or timeout occurs */ + do { + if (chip->dev_ready(mtd)) + break; + touch_softlockup_watchdog(); + } while (time_before(jiffies, timeo)); + led_trigger_event(nand_led_trigger, LED_OFF); +#else u32 timeo = (CONFIG_SYS_HZ * 20) / 1000; u32 time_start; time_start = get_timer(0); - /* Wait until command is processed or timeout occurs */ while (get_timer(time_start) < timeo) { if (chip->dev_ready) if (chip->dev_ready(mtd)) break; } +#endif } +EXPORT_SYMBOL_GPL(nand_wait_ready); /** * nand_command - [DEFAULT] Send command to NAND device @@ -541,7 +710,7 @@ void nand_wait_ready(struct mtd_info *mtd) * @page_addr: the page address for this command, -1 if none * * Send command to NAND device. This function is used for small page devices - * (256/512 Bytes per page). + * (512 Bytes per page). */ static void nand_command(struct mtd_info *mtd, unsigned int command, int column, int page_addr) @@ -660,8 +829,7 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command, } /* Command latch cycle */ - chip->cmd_ctrl(mtd, command & 0xff, - NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE); + chip->cmd_ctrl(mtd, command, NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE); if (column != -1 || page_addr != -1) { int ctrl = NAND_CTRL_CHANGE | NAND_NCE | NAND_ALE; @@ -701,16 +869,6 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command, case NAND_CMD_SEQIN: case NAND_CMD_RNDIN: case NAND_CMD_STATUS: - case NAND_CMD_DEPLETE1: - return; - - case NAND_CMD_STATUS_ERROR: - case NAND_CMD_STATUS_ERROR0: - case NAND_CMD_STATUS_ERROR1: - case NAND_CMD_STATUS_ERROR2: - case NAND_CMD_STATUS_ERROR3: - /* Read error status commands require only a short delay */ - udelay(chip->chip_delay); return; case NAND_CMD_RESET: @@ -761,18 +919,91 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command, } /** - * nand_get_device - [GENERIC] Get chip for selected access + * panic_nand_get_device - [GENERIC] Get chip for selected access * @chip: the nand chip descriptor * @mtd: MTD device structure * @new_state: the state which is requested * + * Used when in panic, no locks are taken. + */ +static void panic_nand_get_device(struct nand_chip *chip, + struct mtd_info *mtd, int new_state) +{ + /* Hardware controller shared among independent devices */ + chip->controller->active = chip; + chip->state = new_state; +} + +/** + * nand_get_device - [GENERIC] Get chip for selected access + * @mtd: MTD device structure + * @new_state: the state which is requested + * * Get the device and lock it for exclusive access */ static int -nand_get_device(struct nand_chip *chip, struct mtd_info *mtd, int new_state) +nand_get_device(struct mtd_info *mtd, int new_state) { + struct nand_chip *chip = mtd->priv; +#ifndef __UBOOT__ + spinlock_t *lock = &chip->controller->lock; + wait_queue_head_t *wq = &chip->controller->wq; + DECLARE_WAITQUEUE(wait, current); +retry: + spin_lock(lock); + + /* Hardware controller shared among independent devices */ + if (!chip->controller->active) + chip->controller->active = chip; + + if (chip->controller->active == chip && chip->state == FL_READY) { + chip->state = new_state; + spin_unlock(lock); + return 0; + } + if (new_state == FL_PM_SUSPENDED) { + if (chip->controller->active->state == FL_PM_SUSPENDED) { + chip->state = FL_PM_SUSPENDED; + spin_unlock(lock); + return 0; + } + } + set_current_state(TASK_UNINTERRUPTIBLE); + add_wait_queue(wq, &wait); + spin_unlock(lock); + schedule(); + remove_wait_queue(wq, &wait); + goto retry; +#else chip->state = new_state; return 0; +#endif +} + +/** + * panic_nand_wait - [GENERIC] wait until the command is done + * @mtd: MTD device structure + * @chip: NAND chip structure + * @timeo: timeout + * + * Wait for command done. This is a helper function for nand_wait used when + * we are in interrupt context. May happen when in panic and trying to write + * an oops through mtdoops. + */ +static void panic_nand_wait(struct mtd_info *mtd, struct nand_chip *chip, + unsigned long timeo) +{ + int i; + for (i = 0; i < timeo; i++) { + if (chip->dev_ready) { + if (chip->dev_ready(mtd)) + break; + } else { + if (chip->read_byte(mtd) & NAND_STATUS_READY) + break; + } + mdelay(1); + } } /** @@ -786,28 +1017,42 @@ nand_get_device(struct nand_chip *chip, struct mtd_info *mtd, int new_state) */ static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip) { - unsigned long timeo; - int state = chip->state; - u32 time_start; - if (state == FL_ERASING) - timeo = (CONFIG_SYS_HZ * 400) / 1000; - else - timeo = (CONFIG_SYS_HZ * 20) / 1000; + int status, state = chip->state; + unsigned long timeo = (state == FL_ERASING ? 400 : 20); - if ((state == FL_ERASING) && (chip->options & NAND_IS_AND)) - chip->cmdfunc(mtd, NAND_CMD_STATUS_MULTI, -1, -1); - else - chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1); + led_trigger_event(nand_led_trigger, LED_FULL); - time_start = get_timer(0); + /* + * Apply this short delay always to ensure that we do wait tWB in any + * case on any machine. + */ + ndelay(100); - while (1) { - if (get_timer(time_start) > timeo) { - printf("Timeout!"); - return 0x01; - } + chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1); +#ifndef __UBOOT__ + if (in_interrupt() || oops_in_progress) + panic_nand_wait(mtd, chip, timeo); + else { + timeo = jiffies + msecs_to_jiffies(timeo); + while (time_before(jiffies, timeo)) { + if (chip->dev_ready) { + if (chip->dev_ready(mtd)) + break; + } else { + if (chip->read_byte(mtd) & NAND_STATUS_READY) + break; + } + cond_resched(); + } + } +#else + u32 timer = (CONFIG_SYS_HZ * timeo) / 1000; + u32 time_start; + + time_start = get_timer(0); + while (get_timer(time_start) < timer) { if (chip->dev_ready) { if (chip->dev_ready(mtd)) break; @@ -816,16 +1061,177 @@ static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip) break; } } +#endif #ifdef PPCHAMELON_NAND_TIMER_HACK time_start = get_timer(0); while (get_timer(time_start) < 10) ; #endif /* PPCHAMELON_NAND_TIMER_HACK */ + led_trigger_event(nand_led_trigger, LED_OFF); + + status = (int)chip->read_byte(mtd); + /* This can happen if in case of timeout or buggy dev_ready */ + WARN_ON(!(status & NAND_STATUS_READY)); + return status; +} + +#ifndef __UBOOT__ +/** + * __nand_unlock - [REPLACEABLE] unlocks specified locked blocks + * @mtd: mtd info + * @ofs: offset to start unlock from + * @len: length to unlock + * @invert: when = 0, unlock the range of blocks within the lower and + * upper boundary address + * when = 1, unlock the range of blocks outside the boundaries + * of the lower and upper boundary address + * + * Returs unlock status. + */ +static int __nand_unlock(struct mtd_info *mtd, loff_t ofs, + uint64_t len, int invert) +{ + int ret = 0; + int status, page; + struct nand_chip *chip = mtd->priv; + + /* Submit address of first page to unlock */ + page = ofs >> chip->page_shift; + chip->cmdfunc(mtd, NAND_CMD_UNLOCK1, -1, page & chip->pagemask); + + /* Submit address of last page to unlock */ + page = (ofs + len) >> chip->page_shift; + chip->cmdfunc(mtd, NAND_CMD_UNLOCK2, -1, + (page | invert) & chip->pagemask); + + /* Call wait ready function */ + status = chip->waitfunc(mtd, chip); + /* See if device thinks it succeeded */ + if (status & NAND_STATUS_FAIL) { + pr_debug("%s: error status = 0x%08x\n", + __func__, status); + ret = -EIO; + } - return (int)chip->read_byte(mtd); + return ret; } /** + * nand_unlock - [REPLACEABLE] unlocks specified locked blocks + * @mtd: mtd info + * @ofs: offset to start unlock from + * @len: length to unlock + * + * Returns unlock status. + */ +int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) +{ + int ret = 0; + int chipnr; + struct nand_chip *chip = mtd->priv; + + pr_debug("%s: start = 0x%012llx, len = %llu\n", + __func__, (unsigned long long)ofs, len); + + if (check_offs_len(mtd, ofs, len)) + ret = -EINVAL; + + /* Align to last block address if size addresses end of the device */ + if (ofs + len == mtd->size) + len -= mtd->erasesize; + + nand_get_device(mtd, FL_UNLOCKING); + + /* Shift to get chip number */ + chipnr = ofs >> chip->chip_shift; + + chip->select_chip(mtd, chipnr); + + /* Check, if it is write protected */ + if (nand_check_wp(mtd)) { + pr_debug("%s: device is write protected!\n", + __func__); + ret = -EIO; + goto out; + } + + ret = __nand_unlock(mtd, ofs, len, 0); + +out: + chip->select_chip(mtd, -1); + nand_release_device(mtd); + + return ret; +} +EXPORT_SYMBOL(nand_unlock); + +/** + * nand_lock - [REPLACEABLE] locks all blocks present in the device + * @mtd: mtd info + * @ofs: offset to start unlock from + * @len: length to unlock + * + * This feature is not supported in many NAND parts. 'Micron' NAND parts do + * have this feature, but it allows only to lock all blocks, not for specified + * range for block. Implementing 'lock' feature by making use of 'unlock', for + * now. + * + * Returns lock status. + */ +int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) +{ + int ret = 0; + int chipnr, status, page; + struct nand_chip *chip = mtd->priv; + + pr_debug("%s: start = 0x%012llx, len = %llu\n", + __func__, (unsigned long long)ofs, len); + + if (check_offs_len(mtd, ofs, len)) + ret = -EINVAL; + + nand_get_device(mtd, FL_LOCKING); + + /* Shift to get chip number */ + chipnr = ofs >> chip->chip_shift; + + chip->select_chip(mtd, chipnr); + + /* Check, if it is write protected */ + if (nand_check_wp(mtd)) { + pr_debug("%s: device is write protected!\n", + __func__); + status = MTD_ERASE_FAILED; + ret = -EIO; + goto out; + } + + /* Submit address of first page to lock */ + page = ofs >> chip->page_shift; + chip->cmdfunc(mtd, NAND_CMD_LOCK, -1, page & chip->pagemask); + + /* Call wait ready function */ + status = chip->waitfunc(mtd, chip); + /* See if device thinks it succeeded */ + if (status & NAND_STATUS_FAIL) { + pr_debug("%s: error status = 0x%08x\n", + __func__, status); + ret = -EIO; + goto out; + } + + ret = __nand_unlock(mtd, ofs, len, 0x1); + +out: + chip->select_chip(mtd, -1); + nand_release_device(mtd); + + return ret; +} +EXPORT_SYMBOL(nand_lock); +#endif + +/** * nand_read_page_raw - [INTERN] read raw page data without ecc * @mtd: mtd info structure * @chip: nand chip info structure @@ -906,6 +1312,7 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *ecc_calc = chip->buffers->ecccalc; uint8_t *ecc_code = chip->buffers->ecccode; uint32_t *eccpos = chip->ecc.layout->eccpos; + unsigned int max_bitflips = 0; chip->ecc.read_page_raw(mtd, chip, buf, 1, page); @@ -922,16 +1329,18 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip, int stat; stat = chip->ecc.correct(mtd, p, &ecc_code[i], &ecc_calc[i]); - if (stat < 0) + if (stat < 0) { mtd->ecc_stats.failed++; - else + } else { mtd->ecc_stats.corrected += stat; + max_bitflips = max_t(unsigned int, max_bitflips, stat); + } } - return 0; + return max_bitflips; } /** - * nand_read_subpage - [REPLACEABLE] software ECC based sub-page read function + * nand_read_subpage - [REPLACEABLE] ECC based sub-page read function * @mtd: mtd info structure * @chip: nand chip info structure * @data_offs: offset of requested data within the page @@ -948,6 +1357,7 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, int datafrag_len, eccfrag_len, aligned_len, aligned_pos; int busw = (chip->options & NAND_BUSWIDTH_16) ? 2 : 1; int index = 0; + unsigned int max_bitflips = 0; /* Column address within the page aligned to ECC size (256bytes) */ start_step = data_offs / chip->ecc.size; @@ -1012,12 +1422,14 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, stat = chip->ecc.correct(mtd, p, &chip->buffers->ecccode[i], &chip->buffers->ecccalc[i]); - if (stat < 0) + if (stat < 0) { mtd->ecc_stats.failed++; - else + } else { mtd->ecc_stats.corrected += stat; + max_bitflips = max_t(unsigned int, max_bitflips, stat); + } } - return 0; + return max_bitflips; } /** @@ -1040,6 +1452,7 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *ecc_calc = chip->buffers->ecccalc; uint8_t *ecc_code = chip->buffers->ecccode; uint32_t *eccpos = chip->ecc.layout->eccpos; + unsigned int max_bitflips = 0; for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) { chip->ecc.hwctl(mtd, NAND_ECC_READ); @@ -1058,12 +1471,14 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, int stat; stat = chip->ecc.correct(mtd, p, &ecc_code[i], &ecc_calc[i]); - if (stat < 0) + if (stat < 0) { mtd->ecc_stats.failed++; - else + } else { mtd->ecc_stats.corrected += stat; + max_bitflips = max_t(unsigned int, max_bitflips, stat); + } } - return 0; + return max_bitflips; } /** @@ -1090,6 +1505,7 @@ static int nand_read_page_hwecc_oob_first(struct mtd_info *mtd, uint8_t *ecc_code = chip->buffers->ecccode; uint32_t *eccpos = chip->ecc.layout->eccpos; uint8_t *ecc_calc = chip->buffers->ecccalc; + unsigned int max_bitflips = 0; /* Read the OOB area first */ chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page); @@ -1107,12 +1523,14 @@ static int nand_read_page_hwecc_oob_first(struct mtd_info *mtd, chip->ecc.calculate(mtd, p, &ecc_calc[i]); stat = chip->ecc.correct(mtd, p, &ecc_code[i], NULL); - if (stat < 0) + if (stat < 0) { mtd->ecc_stats.failed++; - else + } else { mtd->ecc_stats.corrected += stat; + max_bitflips = max_t(unsigned int, max_bitflips, stat); + } } - return 0; + return max_bitflips; } /** @@ -1134,6 +1552,7 @@ static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip, int eccsteps = chip->ecc.steps; uint8_t *p = buf; uint8_t *oob = chip->oob_poi; + unsigned int max_bitflips = 0; for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) { int stat; @@ -1150,10 +1569,12 @@ static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip, chip->read_buf(mtd, oob, eccbytes); stat = chip->ecc.correct(mtd, p, oob, NULL); - if (stat < 0) + if (stat < 0) { mtd->ecc_stats.failed++; - else + } else { mtd->ecc_stats.corrected += stat; + max_bitflips = max_t(unsigned int, max_bitflips, stat); + } oob += eccbytes; @@ -1168,7 +1589,7 @@ static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip, if (i) chip->read_buf(mtd, oob, i); - return 0; + return max_bitflips; } /** @@ -1220,6 +1641,30 @@ static uint8_t *nand_transfer_oob(struct nand_chip *chip, uint8_t *oob, } /** + * nand_setup_read_retry - [INTERN] Set the READ RETRY mode + * @mtd: MTD device structure + * @retry_mode: the retry mode to use + * + * Some vendors supply a special command to shift the Vt threshold, to be used + * when there are too many bitflips in a page (i.e., ECC error). After setting + * a new threshold, the host should retry reading the page. + */ +static int nand_setup_read_retry(struct mtd_info *mtd, int retry_mode) +{ + struct nand_chip *chip = mtd->priv; + + pr_debug("setting READ RETRY mode %d\n", retry_mode); + + if (retry_mode >= chip->read_retries) + return -EINVAL; + + if (!chip->setup_read_retry) + return -EOPNOTSUPP; + + return chip->setup_read_retry(mtd, retry_mode); +} + +/** * nand_do_read_ops - [INTERN] Read data with ECC * @mtd: MTD device structure * @from: offset to read from @@ -1232,7 +1677,6 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, { int chipnr, page, realpage, col, bytes, aligned, oob_required; struct nand_chip *chip = mtd->priv; - struct mtd_ecc_stats stats; int ret = 0; uint32_t readlen = ops->len; uint32_t oobreadlen = ops->ooblen; @@ -1241,8 +1685,8 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, uint8_t *bufpoi, *oob, *buf; unsigned int max_bitflips = 0; - - stats = mtd->ecc_stats; + int retry_mode = 0; + bool ecc_fail = false; chipnr = (int)(from >> chip->chip_shift); chip->select_chip(mtd, chipnr); @@ -1257,8 +1701,9 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, oob_required = oob ? 1 : 0; while (1) { - WATCHDOG_RESET(); + unsigned int ecc_failures = mtd->ecc_stats.failed; + WATCHDOG_RESET(); bytes = min(mtd->writesize - col, readlen); aligned = (bytes == mtd->writesize); @@ -1266,6 +1711,7 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, if (realpage != chip->pagebuf || oob) { bufpoi = aligned ? buf : chip->buffers->databuf; +read_retry: chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page); /* @@ -1277,7 +1723,7 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, oob_required, page); else if (!aligned && NAND_HAS_SUBPAGE_READ(chip) && - !oob) + !oob) ret = chip->ecc.read_subpage(mtd, chip, col, bytes, bufpoi); else @@ -1295,7 +1741,7 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, /* Transfer not aligned data */ if (!aligned) { if (!NAND_HAS_SUBPAGE_READ(chip) && !oob && - !(mtd->ecc_stats.failed - stats.failed) && + !(mtd->ecc_stats.failed - ecc_failures) && (ops->mode != MTD_OPS_RAW)) { chip->pagebuf = realpage; chip->pagebuf_bitflips = ret; @@ -1306,8 +1752,6 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, memcpy(buf, chip->buffers->databuf + col, bytes); } - buf += bytes; - if (unlikely(oob)) { int toread = min(oobreadlen, max_oobsize); @@ -1317,6 +1761,33 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, oobreadlen -= toread; } } + + if (chip->options & NAND_NEED_READRDY) { + /* Apply delay or wait for ready/busy pin */ + if (!chip->dev_ready) + udelay(chip->chip_delay); + else + nand_wait_ready(mtd); + } + + if (mtd->ecc_stats.failed - ecc_failures) { + if (retry_mode + 1 < chip->read_retries) { + retry_mode++; + ret = nand_setup_read_retry(mtd, + retry_mode); + if (ret < 0) + break; + + /* Reset failures; retry */ + mtd->ecc_stats.failed = ecc_failures; + goto read_retry; + } else { + /* No more retry modes; real failure */ + ecc_fail = true; + } + } + + buf += bytes; } else { memcpy(buf, chip->buffers->databuf + col, bytes); buf += bytes; @@ -1326,6 +1797,14 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, readlen -= bytes; + /* Reset to retry mode 0 */ + if (retry_mode) { + ret = nand_setup_read_retry(mtd, 0); + if (ret < 0) + break; + retry_mode = 0; + } + if (!readlen) break; @@ -1342,15 +1821,16 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, chip->select_chip(mtd, chipnr); } } + chip->select_chip(mtd, -1); ops->retlen = ops->len - (size_t) readlen; if (oob) ops->oobretlen = ops->ooblen - oobreadlen; - if (ret) + if (ret < 0) return ret; - if (mtd->ecc_stats.failed - stats.failed) + if (ecc_fail) return -EBADMSG; return max_bitflips; @@ -1369,11 +1849,10 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, static int nand_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, uint8_t *buf) { - struct nand_chip *chip = mtd->priv; struct mtd_oob_ops ops; int ret; - nand_get_device(chip, mtd, FL_READING); + nand_get_device(mtd, FL_READING); ops.len = len; ops.datbuf = buf; ops.oobbuf = NULL; @@ -1537,7 +2016,7 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, uint8_t *buf = ops->oobbuf; int ret = 0; - MTDDEBUG(MTD_DEBUG_LEVEL3, "%s: from = 0x%08Lx, len = %i\n", + pr_debug("%s: from = 0x%08Lx, len = %i\n", __func__, (unsigned long long)from, readlen); stats = mtd->ecc_stats; @@ -1548,8 +2027,8 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, len = mtd->oobsize; if (unlikely(ops->ooboffs >= len)) { - MTDDEBUG(MTD_DEBUG_LEVEL0, "%s: Attempt to start read " - "outside oob\n", __func__); + pr_debug("%s: attempt to start read outside oob\n", + __func__); return -EINVAL; } @@ -1557,8 +2036,8 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, if (unlikely(from >= mtd->size || ops->ooboffs + readlen > ((mtd->size >> chip->page_shift) - (from >> chip->page_shift)) * len)) { - MTDDEBUG(MTD_DEBUG_LEVEL0, "%s: Attempt read beyond end " - "of device\n", __func__); + pr_debug("%s: attempt to read beyond end of device\n", + __func__); return -EINVAL; } @@ -1571,6 +2050,7 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, while (1) { WATCHDOG_RESET(); + if (ops->mode == MTD_OPS_RAW) ret = chip->ecc.read_oob_raw(mtd, chip, page); else @@ -1582,6 +2062,14 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, len = min(len, readlen); buf = nand_transfer_oob(chip, buf, ops, len); + if (chip->options & NAND_NEED_READRDY) { + /* Apply delay or wait for ready/busy pin */ + if (!chip->dev_ready) + udelay(chip->chip_delay); + else + nand_wait_ready(mtd); + } + readlen -= len; if (!readlen) break; @@ -1597,6 +2085,7 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, chip->select_chip(mtd, chipnr); } } + chip->select_chip(mtd, -1); ops->oobretlen = ops->ooblen - readlen; @@ -1620,19 +2109,18 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, static int nand_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { - struct nand_chip *chip = mtd->priv; int ret = -ENOTSUPP; ops->retlen = 0; /* Do not allow reads past end of device */ if (ops->datbuf && (from + ops->len) > mtd->size) { - MTDDEBUG(MTD_DEBUG_LEVEL0, "%s: Attempt read " - "beyond end of device\n", __func__); + pr_debug("%s: attempt to read beyond end of device\n", + __func__); return -EINVAL; } - nand_get_device(chip, mtd, FL_READING); + nand_get_device(mtd, FL_READING); switch (ops->mode) { case MTD_OPS_PLACE_OOB: @@ -1774,6 +2262,68 @@ static int nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, return 0; } + +/** + * nand_write_subpage_hwecc - [REPLACABLE] hardware ECC based subpage write + * @mtd: mtd info structure + * @chip: nand chip info structure + * @offset: column address of subpage within the page + * @data_len: data length + * @buf: data buffer + * @oob_required: must write chip->oob_poi to OOB + */ +static int nand_write_subpage_hwecc(struct mtd_info *mtd, + struct nand_chip *chip, uint32_t offset, + uint32_t data_len, const uint8_t *buf, + int oob_required) +{ + uint8_t *oob_buf = chip->oob_poi; + uint8_t *ecc_calc = chip->buffers->ecccalc; + int ecc_size = chip->ecc.size; + int ecc_bytes = chip->ecc.bytes; + int ecc_steps = chip->ecc.steps; + uint32_t *eccpos = chip->ecc.layout->eccpos; + uint32_t start_step = offset / ecc_size; + uint32_t end_step = (offset + data_len - 1) / ecc_size; + int oob_bytes = mtd->oobsize / ecc_steps; + int step, i; + + for (step = 0; step < ecc_steps; step++) { + /* configure controller for WRITE access */ + chip->ecc.hwctl(mtd, NAND_ECC_WRITE); + + /* write data (untouched subpages already masked by 0xFF) */ + chip->write_buf(mtd, buf, ecc_size); + + /* mask ECC of un-touched subpages by padding 0xFF */ + if ((step < start_step) || (step > end_step)) + memset(ecc_calc, 0xff, ecc_bytes); + else + chip->ecc.calculate(mtd, buf, ecc_calc); + + /* mask OOB of un-touched subpages by padding 0xFF */ + /* if oob_required, preserve OOB metadata of written subpage */ + if (!oob_required || (step < start_step) || (step > end_step)) + memset(oob_buf, 0xff, oob_bytes); + + buf += ecc_size; + ecc_calc += ecc_bytes; + oob_buf += oob_bytes; + } + + /* copy calculated ECC for whole page to chip->buffer->oob */ + /* this include masked-value(0xFF) for unwritten subpages */ + ecc_calc = chip->buffers->ecccalc; + for (i = 0; i < chip->ecc.total; i++) + chip->oob_poi[eccpos[i]] = ecc_calc[i]; + + /* write OOB buffer to NAND device */ + chip->write_buf(mtd, chip->oob_poi, mtd->oobsize); + + return 0; +} + + /** * nand_write_page_syndrome - [REPLACEABLE] hardware ECC syndrome based page write * @mtd: mtd info structure @@ -1826,6 +2376,8 @@ static int nand_write_page_syndrome(struct mtd_info *mtd, * nand_write_page - [REPLACEABLE] write one page * @mtd: MTD device structure * @chip: NAND chip descriptor + * @offset: address offset within the page + * @data_len: length of actual data to be written * @buf: the data to write * @oob_required: must write chip->oob_poi to OOB * @page: page number to write @@ -1833,15 +2385,25 @@ static int nand_write_page_syndrome(struct mtd_info *mtd, * @raw: use _raw version of write_page */ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip, - const uint8_t *buf, int oob_required, int page, - int cached, int raw) + uint32_t offset, int data_len, const uint8_t *buf, + int oob_required, int page, int cached, int raw) { - int status; + int status, subpage; + + if (!(chip->options & NAND_NO_SUBPAGE_WRITE) && + chip->ecc.write_subpage) + subpage = offset || (data_len < mtd->writesize); + else + subpage = 0; chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page); if (unlikely(raw)) - status = chip->ecc.write_page_raw(mtd, chip, buf, oob_required); + status = chip->ecc.write_page_raw(mtd, chip, buf, + oob_required); + else if (subpage) + status = chip->ecc.write_subpage(mtd, chip, offset, data_len, + buf, oob_required); else status = chip->ecc.write_page(mtd, chip, buf, oob_required); @@ -1854,7 +2416,7 @@ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip, */ cached = 0; - if (!cached || !(chip->options & NAND_CACHEPRG)) { + if (!cached || !NAND_HAS_CACHEPROG(chip)) { chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); status = chip->waitfunc(mtd, chip); @@ -1873,7 +2435,9 @@ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip, status = chip->waitfunc(mtd, chip); } -#ifdef CONFIG_MTD_NAND_VERIFY_WRITE + +#ifdef __UBOOT__ +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) /* Send command to read back the data */ chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page); @@ -1883,6 +2447,8 @@ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip, /* Make sure the next page prog is preceded by a status read */ chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1); #endif +#endif + return 0; } @@ -1965,26 +2531,34 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to, uint8_t *oob = ops->oobbuf; uint8_t *buf = ops->datbuf; - int ret, subpage; + int ret; int oob_required = oob ? 1 : 0; ops->retlen = 0; if (!writelen) return 0; - column = to & (mtd->writesize - 1); - subpage = column || (writelen & (mtd->writesize - 1)); - - if (subpage && oob) +#ifndef __UBOOT__ + /* Reject writes, which are not page aligned */ + if (NOTALIGNED(to) || NOTALIGNED(ops->len)) { +#else + /* Reject writes, which are not page aligned */ + if (NOTALIGNED(to)) { +#endif + pr_notice("%s: attempt to write non page aligned data\n", + __func__); return -EINVAL; + } + + column = to & (mtd->writesize - 1); chipnr = (int)(to >> chip->chip_shift); chip->select_chip(mtd, chipnr); /* Check, if it is write protected */ if (nand_check_wp(mtd)) { - printk (KERN_NOTICE "nand_do_write_ops: Device is write protected\n"); - return -EIO; + ret = -EIO; + goto err_out; } realpage = (int)(to >> chip->page_shift); @@ -1997,18 +2571,19 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to, chip->pagebuf = -1; /* Don't allow multipage oob writes with offset */ - if (oob && ops->ooboffs && (ops->ooboffs + ops->ooblen > oobmaxlen)) - return -EINVAL; + if (oob && ops->ooboffs && (ops->ooboffs + ops->ooblen > oobmaxlen)) { + ret = -EINVAL; + goto err_out; + } while (1) { - WATCHDOG_RESET(); - int bytes = mtd->writesize; int cached = writelen > bytes && page != blockmask; uint8_t *wbuf = buf; + WATCHDOG_RESET(); /* Partial page write? */ - if (unlikely(column || writelen < mtd->writesize)) { + if (unlikely(column || writelen < (mtd->writesize - 1))) { cached = 0; bytes = min_t(int, bytes - column, (int) writelen); chip->pagebuf = -1; @@ -2025,9 +2600,9 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to, /* We still need to erase leftover OOB data */ memset(chip->oob_poi, 0xff, mtd->oobsize); } - - ret = chip->write_page(mtd, chip, wbuf, oob_required, page, - cached, (ops->mode == MTD_OPS_RAW)); + ret = chip->write_page(mtd, chip, column, bytes, wbuf, + oob_required, page, cached, + (ops->mode == MTD_OPS_RAW)); if (ret) break; @@ -2051,6 +2626,44 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to, ops->retlen = ops->len - writelen; if (unlikely(oob)) ops->oobretlen = ops->ooblen; + +err_out: + chip->select_chip(mtd, -1); + return ret; +} + +/** + * panic_nand_write - [MTD Interface] NAND write with ECC + * @mtd: MTD device structure + * @to: offset to write to + * @len: number of bytes to write + * @retlen: pointer to variable to store the number of written bytes + * @buf: the data to write + * + * NAND write with ECC. Used when performing writes in interrupt context, this + * may for example be called by mtdoops when writing an oops while in panic. + */ +static int panic_nand_write(struct mtd_info *mtd, loff_t to, size_t len, + size_t *retlen, const uint8_t *buf) +{ + struct nand_chip *chip = mtd->priv; + struct mtd_oob_ops ops; + int ret; + + /* Wait for the device to get ready */ + panic_nand_wait(mtd, chip, 400); + + /* Grab the device */ + panic_nand_get_device(chip, mtd, FL_WRITING); + + ops.len = len; + ops.datbuf = (uint8_t *)buf; + ops.oobbuf = NULL; + ops.mode = MTD_OPS_PLACE_OOB; + + ret = nand_do_write_ops(mtd, to, &ops); + + *retlen = ops.retlen; return ret; } @@ -2067,11 +2680,10 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to, static int nand_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const uint8_t *buf) { - struct nand_chip *chip = mtd->priv; struct mtd_oob_ops ops; int ret; - nand_get_device(chip, mtd, FL_WRITING); + nand_get_device(mtd, FL_WRITING); ops.len = len; ops.datbuf = (uint8_t *)buf; ops.oobbuf = NULL; @@ -2096,7 +2708,7 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to, int chipnr, page, status, len; struct nand_chip *chip = mtd->priv; - MTDDEBUG(MTD_DEBUG_LEVEL3, "%s: to = 0x%08x, len = %i\n", + pr_debug("%s: to = 0x%08x, len = %i\n", __func__, (unsigned int)to, (int)ops->ooblen); if (ops->mode == MTD_OPS_AUTO_OOB) @@ -2106,14 +2718,14 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to, /* Do not allow write past end of page */ if ((ops->ooboffs + ops->ooblen) > len) { - MTDDEBUG(MTD_DEBUG_LEVEL0, "%s: Attempt to write " - "past end of page\n", __func__); + pr_debug("%s: attempt to write past end of page\n", + __func__); return -EINVAL; } if (unlikely(ops->ooboffs >= len)) { - MTDDEBUG(MTD_DEBUG_LEVEL0, "%s: Attempt to start " - "write outside oob\n", __func__); + pr_debug("%s: attempt to start write outside oob\n", + __func__); return -EINVAL; } @@ -2122,8 +2734,8 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to, ops->ooboffs + ops->ooblen > ((mtd->size >> chip->page_shift) - (to >> chip->page_shift)) * len)) { - MTDDEBUG(MTD_DEBUG_LEVEL0, "%s: Attempt write beyond " - "end of device\n", __func__); + pr_debug("%s: attempt to write beyond end of device\n", + __func__); return -EINVAL; } @@ -2142,8 +2754,10 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to, chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); /* Check, if it is write protected */ - if (nand_check_wp(mtd)) + if (nand_check_wp(mtd)) { + chip->select_chip(mtd, -1); return -EROFS; + } /* Invalidate the page cache, if we write to the cached page */ if (page == chip->pagebuf) @@ -2156,6 +2770,8 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to, else status = chip->ecc.write_oob(mtd, chip, page & chip->pagemask); + chip->select_chip(mtd, -1); + if (status) return status; @@ -2173,19 +2789,18 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to, static int nand_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops) { - struct nand_chip *chip = mtd->priv; int ret = -ENOTSUPP; ops->retlen = 0; /* Do not allow writes past end of device */ if (ops->datbuf && (to + ops->len) > mtd->size) { - MTDDEBUG(MTD_DEBUG_LEVEL0, "%s: Attempt write beyond " - "end of device\n", __func__); + pr_debug("%s: attempt to write beyond end of device\n", + __func__); return -EINVAL; } - nand_get_device(chip, mtd, FL_WRITING); + nand_get_device(mtd, FL_WRITING); switch (ops->mode) { case MTD_OPS_PLACE_OOB: @@ -2223,24 +2838,6 @@ static void single_erase_cmd(struct mtd_info *mtd, int page) } /** - * multi_erase_cmd - [GENERIC] AND specific block erase command function - * @mtd: MTD device structure - * @page: the page address of the block which will be erased - * - * AND multi block erase command function. Erase 4 consecutive blocks. - */ -static void multi_erase_cmd(struct mtd_info *mtd, int page) -{ - struct nand_chip *chip = mtd->priv; - /* Send commands to erase a block */ - chip->cmdfunc(mtd, NAND_CMD_ERASE1, -1, page++); - chip->cmdfunc(mtd, NAND_CMD_ERASE1, -1, page++); - chip->cmdfunc(mtd, NAND_CMD_ERASE1, -1, page++); - chip->cmdfunc(mtd, NAND_CMD_ERASE1, -1, page); - chip->cmdfunc(mtd, NAND_CMD_ERASE2, -1, -1); -} - -/** * nand_erase - [MTD Interface] erase block(s) * @mtd: MTD device structure * @instr: erase instruction @@ -2252,7 +2849,6 @@ static int nand_erase(struct mtd_info *mtd, struct erase_info *instr) return nand_erase_nand(mtd, instr, 0); } -#define BBT_PAGE_MASK 0xffffff3f /** * nand_erase_nand - [INTERN] erase block(s) * @mtd: MTD device structure @@ -2266,19 +2862,17 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr, { int page, status, pages_per_block, ret, chipnr; struct nand_chip *chip = mtd->priv; - loff_t rewrite_bbt[CONFIG_SYS_NAND_MAX_CHIPS] = {0}; - unsigned int bbt_masked_page = 0xffffffff; loff_t len; - MTDDEBUG(MTD_DEBUG_LEVEL3, "%s: start = 0x%012llx, len = %llu\n", - __func__, (unsigned long long)instr->addr, - (unsigned long long)instr->len); + pr_debug("%s: start = 0x%012llx, len = %llu\n", + __func__, (unsigned long long)instr->addr, + (unsigned long long)instr->len); if (check_offs_len(mtd, instr->addr, instr->len)) return -EINVAL; /* Grab the lock and see if the device is available */ - nand_get_device(chip, mtd, FL_ERASING); + nand_get_device(mtd, FL_ERASING); /* Shift to get first page */ page = (int)(instr->addr >> chip->page_shift); @@ -2292,21 +2886,12 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr, /* Check, if it is write protected */ if (nand_check_wp(mtd)) { - MTDDEBUG(MTD_DEBUG_LEVEL0, "%s: Device is write protected!!!\n", - __func__); + pr_debug("%s: device is write protected!\n", + __func__); instr->state = MTD_ERASE_FAILED; goto erase_exit; } - /* - * If BBT requires refresh, set the BBT page mask to see if the BBT - * should be rewritten. Otherwise the mask is set to 0xffffffff which - * can not be matched. This is also done when the bbt is actually - * erased to avoid recursive updates. - */ - if (chip->options & BBT_AUTO_REFRESH && !allowbbt) - bbt_masked_page = chip->bbt_td->pages[chipnr] & BBT_PAGE_MASK; - /* Loop through the pages */ len = instr->len; @@ -2314,11 +2899,12 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr, while (len) { WATCHDOG_RESET(); + /* Check if we have a bad block, we do not erase bad blocks! */ - if (!instr->scrub && nand_block_checkbad(mtd, ((loff_t) page) << + if (nand_block_checkbad(mtd, ((loff_t) page) << chip->page_shift, 0, allowbbt)) { pr_warn("%s: attempt to erase a bad block at page 0x%08x\n", - __func__, page); + __func__, page); instr->state = MTD_ERASE_FAILED; goto erase_exit; } @@ -2345,25 +2931,16 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr, /* See if block erase succeeded */ if (status & NAND_STATUS_FAIL) { - MTDDEBUG(MTD_DEBUG_LEVEL0, "%s: Failed erase, " - "page 0x%08x\n", __func__, page); + pr_debug("%s: failed erase, page 0x%08x\n", + __func__, page); instr->state = MTD_ERASE_FAILED; instr->fail_addr = ((loff_t)page << chip->page_shift); goto erase_exit; } - /* - * If BBT requires refresh, set the BBT rewrite flag to the - * page being erased. - */ - if (bbt_masked_page != 0xffffffff && - (page & BBT_PAGE_MASK) == bbt_masked_page) - rewrite_bbt[chipnr] = - ((loff_t)page << chip->page_shift); - /* Increment page address and decrement length */ - len -= (1 << chip->phys_erase_shift); + len -= (1ULL << chip->phys_erase_shift); page += pages_per_block; /* Check, if we cross a chip boundary */ @@ -2371,15 +2948,6 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr, chipnr++; chip->select_chip(mtd, -1); chip->select_chip(mtd, chipnr); - - /* - * If BBT requires refresh and BBT-PERCHIP, set the BBT - * page mask to see if this BBT should be rewritten. - */ - if (bbt_masked_page != 0xffffffff && - (chip->bbt_td->options & NAND_BBT_PERCHIP)) - bbt_masked_page = chip->bbt_td->pages[chipnr] & - BBT_PAGE_MASK; } } instr->state = MTD_ERASE_DONE; @@ -2389,29 +2957,13 @@ erase_exit: ret = instr->state == MTD_ERASE_DONE ? 0 : -EIO; /* Deselect and wake up anyone waiting on the device */ + chip->select_chip(mtd, -1); nand_release_device(mtd); /* Do call back function */ if (!ret) mtd_erase_callback(instr); - /* - * If BBT requires refresh and erase was successful, rewrite any - * selected bad block tables. - */ - if (bbt_masked_page == 0xffffffff || ret) - return ret; - - for (chipnr = 0; chipnr < chip->numchips; chipnr++) { - if (!rewrite_bbt[chipnr]) - continue; - /* Update the BBT for chip */ - MTDDEBUG(MTD_DEBUG_LEVEL0, "%s: nand_update_bbt " - "(%d:0x%0llx 0x%0x)\n", __func__, chipnr, - rewrite_bbt[chipnr], chip->bbt_td->pages[chipnr]); - nand_update_bbt(mtd, rewrite_bbt[chipnr]); - } - /* Return more or less happy */ return ret; } @@ -2424,12 +2976,10 @@ erase_exit: */ static void nand_sync(struct mtd_info *mtd) { - struct nand_chip *chip = mtd->priv; - - MTDDEBUG(MTD_DEBUG_LEVEL3, "%s: called\n", __func__); + pr_debug("%s: called\n", __func__); /* Grab the lock and see if the device is available */ - nand_get_device(chip, mtd, FL_SYNCING); + nand_get_device(mtd, FL_SYNCING); /* Release it and go back */ nand_release_device(mtd); } @@ -2451,7 +3001,6 @@ static int nand_block_isbad(struct mtd_info *mtd, loff_t offs) */ static int nand_block_markbad(struct mtd_info *mtd, loff_t ofs) { - struct nand_chip *chip = mtd->priv; int ret; ret = nand_block_isbad(mtd, ofs); @@ -2462,10 +3011,10 @@ static int nand_block_markbad(struct mtd_info *mtd, loff_t ofs) return ret; } - return chip->block_markbad(mtd, ofs); + return nand_block_markbad_lowlevel(mtd, ofs); } - /** +/** * nand_onfi_set_features- [REPLACEABLE] set features for ONFI nand * @mtd: MTD device structure * @chip: nand chip info structure @@ -2476,12 +3025,19 @@ static int nand_onfi_set_features(struct mtd_info *mtd, struct nand_chip *chip, int addr, uint8_t *subfeature_param) { int status; + int i; - if (!chip->onfi_version) +#ifdef CONFIG_SYS_NAND_ONFI_DETECTION + if (!chip->onfi_version || + !(le16_to_cpu(chip->onfi_params.opt_cmd) + & ONFI_OPT_CMD_SET_GET_FEATURES)) return -EINVAL; +#endif chip->cmdfunc(mtd, NAND_CMD_SET_FEATURES, addr, -1); - chip->write_buf(mtd, subfeature_param, ONFI_SUBFEATURE_PARAM_LEN); + for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i) + chip->write_byte(mtd, subfeature_param[i]); + status = chip->waitfunc(mtd, chip); if (status & NAND_STATUS_FAIL) return -EIO; @@ -2498,17 +3054,50 @@ static int nand_onfi_set_features(struct mtd_info *mtd, struct nand_chip *chip, static int nand_onfi_get_features(struct mtd_info *mtd, struct nand_chip *chip, int addr, uint8_t *subfeature_param) { - if (!chip->onfi_version) + int i; + +#ifdef CONFIG_SYS_NAND_ONFI_DETECTION + if (!chip->onfi_version || + !(le16_to_cpu(chip->onfi_params.opt_cmd) + & ONFI_OPT_CMD_SET_GET_FEATURES)) return -EINVAL; +#endif /* clear the sub feature parameters */ memset(subfeature_param, 0, ONFI_SUBFEATURE_PARAM_LEN); chip->cmdfunc(mtd, NAND_CMD_GET_FEATURES, addr, -1); - chip->read_buf(mtd, subfeature_param, ONFI_SUBFEATURE_PARAM_LEN); + for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i) + *subfeature_param++ = chip->read_byte(mtd); return 0; } +#ifndef __UBOOT__ +/** + * nand_suspend - [MTD Interface] Suspend the NAND flash + * @mtd: MTD device structure + */ +static int nand_suspend(struct mtd_info *mtd) +{ + return nand_get_device(mtd, FL_PM_SUSPENDED); +} + +/** + * nand_resume - [MTD Interface] Resume the NAND flash + * @mtd: MTD device structure + */ +static void nand_resume(struct mtd_info *mtd) +{ + struct nand_chip *chip = mtd->priv; + + if (chip->state == FL_PM_SUSPENDED) + nand_release_device(mtd); + else + pr_err("%s called for a chip which is not in suspended state\n", + __func__); +} +#endif + /* Set default functions */ static void nand_set_defaults(struct nand_chip *chip, int busw) { @@ -2526,7 +3115,15 @@ static void nand_set_defaults(struct nand_chip *chip, int busw) if (!chip->select_chip) chip->select_chip = nand_select_chip; - if (!chip->read_byte) + + /* set for ONFI nand */ + if (!chip->onfi_set_features) + chip->onfi_set_features = nand_onfi_set_features; + if (!chip->onfi_get_features) + chip->onfi_get_features = nand_onfi_get_features; + + /* If called twice, pointers that depend on busw may need to be reset */ + if (!chip->read_byte || chip->read_byte == nand_read_byte) chip->read_byte = busw ? nand_read_byte16 : nand_read_byte; if (!chip->read_word) chip->read_word = nand_read_word; @@ -2534,21 +3131,36 @@ static void nand_set_defaults(struct nand_chip *chip, int busw) chip->block_bad = nand_block_bad; if (!chip->block_markbad) chip->block_markbad = nand_default_block_markbad; - if (!chip->write_buf) + if (!chip->write_buf || chip->write_buf == nand_write_buf) chip->write_buf = busw ? nand_write_buf16 : nand_write_buf; - if (!chip->read_buf) + if (!chip->write_byte || chip->write_byte == nand_write_byte) + chip->write_byte = busw ? nand_write_byte16 : nand_write_byte; + if (!chip->read_buf || chip->read_buf == nand_read_buf) chip->read_buf = busw ? nand_read_buf16 : nand_read_buf; - if (!chip->verify_buf) - chip->verify_buf = busw ? nand_verify_buf16 : nand_verify_buf; if (!chip->scan_bbt) chip->scan_bbt = nand_default_bbt; - if (!chip->controller) +#ifdef __UBOOT__ +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) + if (!chip->verify_buf) + chip->verify_buf = busw ? nand_verify_buf16 : nand_verify_buf; +#endif +#endif + + if (!chip->controller) { chip->controller = &chip->hwcontrol; + spin_lock_init(&chip->controller->lock); + init_waitqueue_head(&chip->controller->wq); + } + } #ifdef CONFIG_SYS_NAND_ONFI_DETECTION /* Sanitize ONFI strings so we can safely print them */ +#ifndef __UBOOT__ +static void sanitize_string(uint8_t *s, size_t len) +#else static void sanitize_string(char *s, size_t len) +#endif { ssize_t i; @@ -2577,6 +3189,105 @@ static u16 onfi_crc16(u16 crc, u8 const *p, size_t len) return crc; } +/* Parse the Extended Parameter Page. */ +static int nand_flash_detect_ext_param_page(struct mtd_info *mtd, + struct nand_chip *chip, struct nand_onfi_params *p) +{ + struct onfi_ext_param_page *ep; + struct onfi_ext_section *s; + struct onfi_ext_ecc_info *ecc; + uint8_t *cursor; + int ret = -EINVAL; + int len; + int i; + + len = le16_to_cpu(p->ext_param_page_length) * 16; + ep = kmalloc(len, GFP_KERNEL); + if (!ep) + return -ENOMEM; + + /* Send our own NAND_CMD_PARAM. */ + chip->cmdfunc(mtd, NAND_CMD_PARAM, 0, -1); + + /* Use the Change Read Column command to skip the ONFI param pages. */ + chip->cmdfunc(mtd, NAND_CMD_RNDOUT, + sizeof(*p) * p->num_of_param_pages , -1); + + /* Read out the Extended Parameter Page. */ + chip->read_buf(mtd, (uint8_t *)ep, len); + if ((onfi_crc16(ONFI_CRC_BASE, ((uint8_t *)ep) + 2, len - 2) + != le16_to_cpu(ep->crc))) { + pr_debug("fail in the CRC.\n"); + goto ext_out; + } + + /* + * Check the signature. + * Do not strictly follow the ONFI spec, maybe changed in future. + */ +#ifndef __UBOOT__ + if (strncmp(ep->sig, "EPPS", 4)) { +#else + if (strncmp((char *)ep->sig, "EPPS", 4)) { +#endif + pr_debug("The signature is invalid.\n"); + goto ext_out; + } + + /* find the ECC section. */ + cursor = (uint8_t *)(ep + 1); + for (i = 0; i < ONFI_EXT_SECTION_MAX; i++) { + s = ep->sections + i; + if (s->type == ONFI_SECTION_TYPE_2) + break; + cursor += s->length * 16; + } + if (i == ONFI_EXT_SECTION_MAX) { + pr_debug("We can not find the ECC section.\n"); + goto ext_out; + } + + /* get the info we want. */ + ecc = (struct onfi_ext_ecc_info *)cursor; + + if (!ecc->codeword_size) { + pr_debug("Invalid codeword size\n"); + goto ext_out; + } + + chip->ecc_strength_ds = ecc->ecc_bits; + chip->ecc_step_ds = 1 << ecc->codeword_size; + ret = 0; + +ext_out: + kfree(ep); + return ret; +} + +static int nand_setup_read_retry_micron(struct mtd_info *mtd, int retry_mode) +{ + struct nand_chip *chip = mtd->priv; + uint8_t feature[ONFI_SUBFEATURE_PARAM_LEN] = {retry_mode}; + + return chip->onfi_set_features(mtd, chip, ONFI_FEATURE_ADDR_READ_RETRY, + feature); +} + +/* + * Configure chip properties from Micron vendor-specific ONFI table + */ +static void nand_onfi_detect_micron(struct nand_chip *chip, + struct nand_onfi_params *p) +{ + struct nand_onfi_vendor_micron *micron = (void *)p->vendor; + + if (le16_to_cpu(p->vendor_revision) < 1) + return; + + chip->read_retries = micron->read_retry_options; + chip->setup_read_retry = nand_setup_read_retry_micron; +} + /* * Check if the NAND chip is ONFI compliant, returns 1 if it is, 0 otherwise. */ @@ -2593,19 +3304,29 @@ static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip, chip->read_byte(mtd) != 'F' || chip->read_byte(mtd) != 'I') return 0; + /* + * ONFI must be probed in 8-bit mode or with NAND_BUSWIDTH_AUTO, not + * with NAND_BUSWIDTH_16 + */ + if (chip->options & NAND_BUSWIDTH_16) { + pr_err("ONFI cannot be probed in 16-bit mode; aborting\n"); + return 0; + } + chip->cmdfunc(mtd, NAND_CMD_PARAM, 0, -1); for (i = 0; i < 3; i++) { for (j = 0; j < sizeof(*p); j++) ((uint8_t *)p)[j] = chip->read_byte(mtd); if (onfi_crc16(ONFI_CRC_BASE, (uint8_t *)p, 254) == le16_to_cpu(p->crc)) { - pr_info("ONFI param page %d valid\n", i); break; } } - if (i == 3) + if (i == 3) { + pr_err("Could not find valid ONFI parameter page; aborting\n"); return 0; + } /* Check version */ val = le16_to_cpu(p->revision); @@ -2619,11 +3340,9 @@ static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip, chip->onfi_version = 20; else if (val & (1 << 1)) chip->onfi_version = 10; - else - chip->onfi_version = 0; if (!chip->onfi_version) { - pr_info("%s: unsupported ONFI version: %d\n", __func__, val); + pr_info("unsupported ONFI version: %d\n", val); return 0; } @@ -2631,21 +3350,58 @@ static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip, sanitize_string(p->model, sizeof(p->model)); if (!mtd->name) mtd->name = p->model; + mtd->writesize = le32_to_cpu(p->byte_per_page); - mtd->erasesize = le32_to_cpu(p->pages_per_block) * mtd->writesize; + + /* + * pages_per_block and blocks_per_lun may not be a power-of-2 size + * (don't ask me who thought of this...). MTD assumes that these + * dimensions will be power-of-2, so just truncate the remaining area. + */ + mtd->erasesize = 1 << (fls(le32_to_cpu(p->pages_per_block)) - 1); + mtd->erasesize *= mtd->writesize; + mtd->oobsize = le16_to_cpu(p->spare_bytes_per_page); - chip->chipsize = le32_to_cpu(p->blocks_per_lun); + + /* See erasesize comment */ + chip->chipsize = 1 << (fls(le32_to_cpu(p->blocks_per_lun)) - 1); chip->chipsize *= (uint64_t)mtd->erasesize * p->lun_count; - *busw = 0; - if (le16_to_cpu(p->features) & 1) + chip->bits_per_cell = p->bits_per_cell; + + if (onfi_feature(chip) & ONFI_FEATURE_16_BIT_BUS) *busw = NAND_BUSWIDTH_16; + else + *busw = 0; + + if (p->ecc_bits != 0xff) { + chip->ecc_strength_ds = p->ecc_bits; + chip->ecc_step_ds = 512; + } else if (chip->onfi_version >= 21 && + (onfi_feature(chip) & ONFI_FEATURE_EXT_PARAM_PAGE)) { + + /* + * The nand_flash_detect_ext_param_page() uses the + * Change Read Column command which maybe not supported + * by the chip->cmdfunc. So try to update the chip->cmdfunc + * now. We do not replace user supplied command function. + */ + if (mtd->writesize > 512 && chip->cmdfunc == nand_command) + chip->cmdfunc = nand_command_lp; + + /* The Extended Parameter Page is supported since ONFI 2.1. */ + if (nand_flash_detect_ext_param_page(mtd, chip, p)) + pr_warn("Failed to detect ONFI extended param page\n"); + } else { + pr_warn("Could not retrieve ONFI ECC requirements\n"); + } + + if (p->jedec_id == NAND_MFR_MICRON) + nand_onfi_detect_micron(chip, p); - pr_info("ONFI flash detected\n"); return 1; } #else -static inline int nand_flash_detect_onfi(struct mtd_info *mtd, - struct nand_chip *chip, +static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip, int *busw) { return 0; @@ -2660,7 +3416,7 @@ static inline int nand_flash_detect_onfi(struct mtd_info *mtd, * * Check if an ID string is repeated within a given sequence of bytes at * specific repetition interval period (e.g., {0x20,0x01,0x7F,0x20} has a - * period of 2). This is a helper function for nand_id_len(). Returns non-zero + * period of 3). This is a helper function for nand_id_len(). Returns non-zero * if the repetition has a period of @period; otherwise, returns zero. */ static int nand_id_has_period(u8 *id_data, int arrlen, int period) @@ -2711,6 +3467,16 @@ static int nand_id_len(u8 *id_data, int arrlen) return arrlen; } +/* Extract the bits of per cell from the 3rd byte of the extended ID */ +static int nand_get_bits_per_cell(u8 cellinfo) +{ + int bits; + + bits = cellinfo & NAND_CI_CELLTYPE_MSK; + bits >>= NAND_CI_CELLTYPE_SHIFT; + return bits + 1; +} + /* * Many new NAND share similar device ID codes, which represent the size of the * chip. The rest of the parameters must be decoded according to generic or @@ -2721,7 +3487,7 @@ static void nand_decode_ext_id(struct mtd_info *mtd, struct nand_chip *chip, { int extid, id_len; /* The 3rd id byte holds MLC / multichip data */ - chip->cellinfo = id_data[2]; + chip->bits_per_cell = nand_get_bits_per_cell(id_data[2]); /* The 4th id byte is the important one */ extid = id_data[3]; @@ -2737,8 +3503,7 @@ static void nand_decode_ext_id(struct mtd_info *mtd, struct nand_chip *chip, * ID to decide what to do. */ if (id_len == 6 && id_data[0] == NAND_MFR_SAMSUNG && - (chip->cellinfo & NAND_CI_CELLTYPE_MSK) && - id_data[5] != 0x00) { + !nand_is_slc(chip) && id_data[5] != 0x00) { /* Calc pagesize */ mtd->writesize = 2048 << (extid & 0x03); extid >>= 2; @@ -2760,9 +3525,12 @@ static void nand_decode_ext_id(struct mtd_info *mtd, struct nand_chip *chip, mtd->oobsize = 512; break; case 6: - default: /* Other cases are "reserved" (unknown) */ mtd->oobsize = 640; break; + case 7: + default: /* Other cases are "reserved" (unknown) */ + mtd->oobsize = 1024; + break; } extid >>= 2; /* Calc blocksize */ @@ -2770,7 +3538,7 @@ static void nand_decode_ext_id(struct mtd_info *mtd, struct nand_chip *chip, (((extid >> 1) & 0x04) | (extid & 0x03)); *busw = 0; } else if (id_len == 6 && id_data[0] == NAND_MFR_HYNIX && - (chip->cellinfo & NAND_CI_CELLTYPE_MSK)) { + !nand_is_slc(chip)) { unsigned int tmp; /* Calc pagesize */ @@ -2823,16 +3591,32 @@ static void nand_decode_ext_id(struct mtd_info *mtd, struct nand_chip *chip, extid >>= 2; /* Get buswidth information */ *busw = (extid & 0x01) ? NAND_BUSWIDTH_16 : 0; + + /* + * Toshiba 24nm raw SLC (i.e., not BENAND) have 32B OOB per + * 512B page. For Toshiba SLC, we decode the 5th/6th byte as + * follows: + * - ID byte 6, bits[2:0]: 100b -> 43nm, 101b -> 32nm, + * 110b -> 24nm + * - ID byte 5, bit[7]: 1 -> BENAND, 0 -> raw SLC + */ + if (id_len >= 6 && id_data[0] == NAND_MFR_TOSHIBA && + nand_is_slc(chip) && + (id_data[5] & 0x7) == 0x6 /* 24nm */ && + !(id_data[4] & 0x80) /* !BENAND */) { + mtd->oobsize = 32 * mtd->writesize >> 9; + } + } } - /* +/* * Old devices have chip data hardcoded in the device ID table. nand_decode_id * decodes a matching ID table entry and assigns the MTD size parameters for * the chip. */ static void nand_decode_id(struct mtd_info *mtd, struct nand_chip *chip, - const struct nand_flash_dev *type, u8 id_data[8], + struct nand_flash_dev *type, u8 id_data[8], int *busw) { int maf_id = id_data[0]; @@ -2842,6 +3626,9 @@ static void nand_decode_id(struct mtd_info *mtd, struct nand_chip *chip, mtd->oobsize = mtd->writesize / 32; *busw = type->options & NAND_BUSWIDTH_16; + /* All legacy ID NAND are small-page, SLC */ + chip->bits_per_cell = 1; + /* * Check for Spansion/AMD ID + repeating 5th, 6th byte since * some Spansion chips have erasesize that conflicts with size @@ -2856,7 +3643,7 @@ static void nand_decode_id(struct mtd_info *mtd, struct nand_chip *chip, } } - /* +/* * Set the bad block marker/indicator (BBM/BBI) patterns according to some * heuristic patterns using various detected parameters (e.g., manufacturer, * page size, cell-type information). @@ -2878,11 +3665,11 @@ static void nand_decode_bbm_options(struct mtd_info *mtd, * Micron devices with 2KiB pages and on SLC Samsung, Hynix, Toshiba, * AMD/Spansion, and Macronix. All others scan only the first page. */ - if ((chip->cellinfo & NAND_CI_CELLTYPE_MSK) && + if (!nand_is_slc(chip) && (maf_id == NAND_MFR_SAMSUNG || maf_id == NAND_MFR_HYNIX)) chip->bbt_options |= NAND_BBT_SCANLASTPAGE; - else if ((!(chip->cellinfo & NAND_CI_CELLTYPE_MSK) && + else if ((nand_is_slc(chip) && (maf_id == NAND_MFR_SAMSUNG || maf_id == NAND_MFR_HYNIX || maf_id == NAND_MFR_TOSHIBA || @@ -2893,16 +3680,48 @@ static void nand_decode_bbm_options(struct mtd_info *mtd, chip->bbt_options |= NAND_BBT_SCAN2NDPAGE; } +static inline bool is_full_id_nand(struct nand_flash_dev *type) +{ + return type->id_len; +} + +static bool find_full_id_nand(struct mtd_info *mtd, struct nand_chip *chip, + struct nand_flash_dev *type, u8 *id_data, int *busw) +{ +#ifndef __UBOOT__ + if (!strncmp(type->id, id_data, type->id_len)) { +#else + if (!strncmp((char *)type->id, (char *)id_data, type->id_len)) { +#endif + mtd->writesize = type->pagesize; + mtd->erasesize = type->erasesize; + mtd->oobsize = type->oobsize; + + chip->bits_per_cell = nand_get_bits_per_cell(id_data[2]); + chip->chipsize = (uint64_t)type->chipsize << 20; + chip->options |= type->options; + chip->ecc_strength_ds = NAND_ECC_STRENGTH(type); + chip->ecc_step_ds = NAND_ECC_STEP(type); + + *busw = type->options & NAND_BUSWIDTH_16; + + if (!mtd->name) + mtd->name = type->name; + + return true; + } + return false; +} + /* * Get the flash and manufacturer id and lookup if the type is supported. */ -static const struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd, +static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd, struct nand_chip *chip, int busw, int *maf_id, int *dev_id, - const struct nand_flash_dev *type) + struct nand_flash_dev *type) { - const char *name; int i, maf_idx; u8 id_data[8]; @@ -2936,8 +3755,7 @@ static const struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd, id_data[i] = chip->read_byte(mtd); if (id_data[0] != *maf_id || id_data[1] != *dev_id) { - pr_info("%s: second ID read did not match " - "%02x,%02x against %02x,%02x\n", __func__, + pr_info("second ID read did not match %02x,%02x against %02x,%02x\n", *maf_id, *dev_id, id_data[0], id_data[1]); return ERR_PTR(-ENODEV); } @@ -2945,9 +3763,14 @@ static const struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd, if (!type) type = nand_flash_ids; - for (; type->name != NULL; type++) - if (*dev_id == type->id) - break; + for (; type->name != NULL; type++) { + if (is_full_id_nand(type)) { + if (find_full_id_nand(mtd, chip, type, id_data, &busw)) + goto ident_done; + } else if (*dev_id == type->dev_id) { + break; + } + } chip->onfi_version = 0; if (!type->name || !type->pagesize) { @@ -2973,7 +3796,7 @@ static const struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd, } else { nand_decode_id(mtd, chip, type, id_data, &busw); } - /* Get chip options, preserve non chip based options */ + /* Get chip options */ chip->options |= type->options; /* @@ -2990,15 +3813,19 @@ ident_done: break; } - /* - * Check, if buswidth is correct. Hardware drivers should set - * chip correct! - */ - if (busw != (chip->options & NAND_BUSWIDTH_16)) { - pr_info("NAND device: Manufacturer ID:" - " 0x%02x, Chip ID: 0x%02x (%s %s)\n", *maf_id, - *dev_id, nand_manuf_ids[maf_idx].name, mtd->name); - pr_warn("NAND bus width %d instead %d bit\n", + if (chip->options & NAND_BUSWIDTH_AUTO) { + WARN_ON(chip->options & NAND_BUSWIDTH_16); + chip->options |= busw; + nand_set_defaults(chip, busw); + } else if (busw != (chip->options & NAND_BUSWIDTH_16)) { + /* + * Check, if buswidth is correct. Hardware drivers should set + * chip correct! + */ + pr_info("device found, Manufacturer ID: 0x%02x, Chip ID: 0x%02x\n", + *maf_id, *dev_id); + pr_info("%s %s\n", nand_manuf_ids[maf_idx].name, mtd->name); + pr_warn("bus width %d instead %d bit\n", (chip->options & NAND_BUSWIDTH_16) ? 16 : 8, busw ? 16 : 8); return ERR_PTR(-EINVAL); @@ -3021,28 +3848,23 @@ ident_done: } chip->badblockbits = 8; - - /* Check for AND chips with 4 page planes */ - if (chip->options & NAND_4PAGE_ARRAY) - chip->erase_cmd = multi_erase_cmd; - else - chip->erase_cmd = single_erase_cmd; + chip->erase_cmd = single_erase_cmd; /* Do not replace user supplied command function! */ if (mtd->writesize > 512 && chip->cmdfunc == nand_command) chip->cmdfunc = nand_command_lp; - name = type->name; + pr_info("device found, Manufacturer ID: 0x%02x, Chip ID: 0x%02x\n", + *maf_id, *dev_id); #ifdef CONFIG_SYS_NAND_ONFI_DETECTION - if (chip->onfi_version) - name = chip->onfi_params.model; + pr_info("%s %s\n", nand_manuf_ids[maf_idx].name, + chip->onfi_version ? chip->onfi_params.model : type->name); +#else + pr_info("%s %s\n", nand_manuf_ids[maf_idx].name, type->name); #endif - pr_info("NAND device: Manufacturer ID: 0x%02x, Chip ID: 0x%02x (%s %s)," - " page size: %d, OOB size: %d\n", - *maf_id, *dev_id, nand_manuf_ids[maf_idx].name, - name, + pr_info("%dMiB, %s, page size: %d, OOB size: %d\n", + (int)(chip->chipsize >> 20), nand_is_slc(chip) ? "SLC" : "MLC", mtd->writesize, mtd->oobsize); - return type; } @@ -3058,11 +3880,11 @@ ident_done: * The mtd->owner field must be set to the module of the caller. */ int nand_scan_ident(struct mtd_info *mtd, int maxchips, - const struct nand_flash_dev *table) + struct nand_flash_dev *table) { int i, busw, nand_maf_id, nand_dev_id; struct nand_chip *chip = mtd->priv; - const struct nand_flash_dev *type; + struct nand_flash_dev *type; /* Get buswidth to select the correct functions */ busw = chip->options & NAND_BUSWIDTH_16; @@ -3074,13 +3896,14 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips, &nand_maf_id, &nand_dev_id, table); if (IS_ERR(type)) { -#ifndef CONFIG_SYS_NAND_QUIET_TEST - pr_warn("No NAND device found\n"); -#endif + if (!(chip->options & NAND_SCAN_SILENT_NODEV)) + pr_warn("No NAND device found\n"); chip->select_chip(mtd, -1); return PTR_ERR(type); } + chip->select_chip(mtd, -1); + /* Check for a chip array */ for (i = 1; i < maxchips; i++) { chip->select_chip(mtd, i); @@ -3090,12 +3913,16 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips, chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1); /* Read manufacturer and device IDs */ if (nand_maf_id != chip->read_byte(mtd) || - nand_dev_id != chip->read_byte(mtd)) + nand_dev_id != chip->read_byte(mtd)) { + chip->select_chip(mtd, -1); break; + } + chip->select_chip(mtd, -1); } + #ifdef DEBUG if (i > 1) - pr_info("%d NAND chips detected\n", i); + pr_info("%d chips detected\n", i); #endif /* Store the number of chips and calc total size for mtd */ @@ -3104,6 +3931,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips, return 0; } +EXPORT_SYMBOL(nand_scan_ident); /** @@ -3118,14 +3946,14 @@ int nand_scan_tail(struct mtd_info *mtd) { int i; struct nand_chip *chip = mtd->priv; + struct nand_ecc_ctrl *ecc = &chip->ecc; /* New bad blocks should be marked in OOB, flash-based BBT, or both */ BUG_ON((chip->bbt_options & NAND_BBT_NO_OOB_BBM) && !(chip->bbt_options & NAND_BBT_USE_FLASH)); if (!(chip->options & NAND_OWN_BUFFERS)) - chip->buffers = memalign(ARCH_DMA_MINALIGN, - sizeof(*chip->buffers)); + chip->buffers = kmalloc(sizeof(*chip->buffers), GFP_KERNEL); if (!chip->buffers) return -ENOMEM; @@ -3135,94 +3963,91 @@ int nand_scan_tail(struct mtd_info *mtd) /* * If no default placement scheme is given, select an appropriate one. */ - if (!chip->ecc.layout && (chip->ecc.mode != NAND_ECC_SOFT_BCH)) { + if (!ecc->layout && (ecc->mode != NAND_ECC_SOFT_BCH)) { switch (mtd->oobsize) { case 8: - chip->ecc.layout = &nand_oob_8; + ecc->layout = &nand_oob_8; break; case 16: - chip->ecc.layout = &nand_oob_16; + ecc->layout = &nand_oob_16; break; case 64: - chip->ecc.layout = &nand_oob_64; + ecc->layout = &nand_oob_64; break; case 128: - chip->ecc.layout = &nand_oob_128; + ecc->layout = &nand_oob_128; break; default: pr_warn("No oob scheme defined for oobsize %d\n", mtd->oobsize); + BUG(); } } if (!chip->write_page) chip->write_page = nand_write_page; - /* set for ONFI nand */ - if (!chip->onfi_set_features) - chip->onfi_set_features = nand_onfi_set_features; - if (!chip->onfi_get_features) - chip->onfi_get_features = nand_onfi_get_features; - /* * Check ECC mode, default to software if 3byte/512byte hardware ECC is * selected and we have 256 byte pagesize fallback to software ECC */ - switch (chip->ecc.mode) { + switch (ecc->mode) { case NAND_ECC_HW_OOB_FIRST: /* Similar to NAND_ECC_HW, but a separate read_page handle */ - if (!chip->ecc.calculate || !chip->ecc.correct || - !chip->ecc.hwctl) { + if (!ecc->calculate || !ecc->correct || !ecc->hwctl) { pr_warn("No ECC functions supplied; " "hardware ECC not possible\n"); BUG(); } - if (!chip->ecc.read_page) - chip->ecc.read_page = nand_read_page_hwecc_oob_first; + if (!ecc->read_page) + ecc->read_page = nand_read_page_hwecc_oob_first; case NAND_ECC_HW: /* Use standard hwecc read page function? */ - if (!chip->ecc.read_page) - chip->ecc.read_page = nand_read_page_hwecc; - if (!chip->ecc.write_page) - chip->ecc.write_page = nand_write_page_hwecc; - if (!chip->ecc.read_page_raw) - chip->ecc.read_page_raw = nand_read_page_raw; - if (!chip->ecc.write_page_raw) - chip->ecc.write_page_raw = nand_write_page_raw; - if (!chip->ecc.read_oob) - chip->ecc.read_oob = nand_read_oob_std; - if (!chip->ecc.write_oob) - chip->ecc.write_oob = nand_write_oob_std; + if (!ecc->read_page) + ecc->read_page = nand_read_page_hwecc; + if (!ecc->write_page) + ecc->write_page = nand_write_page_hwecc; + if (!ecc->read_page_raw) + ecc->read_page_raw = nand_read_page_raw; + if (!ecc->write_page_raw) + ecc->write_page_raw = nand_write_page_raw; + if (!ecc->read_oob) + ecc->read_oob = nand_read_oob_std; + if (!ecc->write_oob) + ecc->write_oob = nand_write_oob_std; + if (!ecc->read_subpage) + ecc->read_subpage = nand_read_subpage; + if (!ecc->write_subpage) + ecc->write_subpage = nand_write_subpage_hwecc; case NAND_ECC_HW_SYNDROME: - if ((!chip->ecc.calculate || !chip->ecc.correct || - !chip->ecc.hwctl) && - (!chip->ecc.read_page || - chip->ecc.read_page == nand_read_page_hwecc || - !chip->ecc.write_page || - chip->ecc.write_page == nand_write_page_hwecc)) { + if ((!ecc->calculate || !ecc->correct || !ecc->hwctl) && + (!ecc->read_page || + ecc->read_page == nand_read_page_hwecc || + !ecc->write_page || + ecc->write_page == nand_write_page_hwecc)) { pr_warn("No ECC functions supplied; " "hardware ECC not possible\n"); BUG(); } /* Use standard syndrome read/write page function? */ - if (!chip->ecc.read_page) - chip->ecc.read_page = nand_read_page_syndrome; - if (!chip->ecc.write_page) - chip->ecc.write_page = nand_write_page_syndrome; - if (!chip->ecc.read_page_raw) - chip->ecc.read_page_raw = nand_read_page_raw_syndrome; - if (!chip->ecc.write_page_raw) - chip->ecc.write_page_raw = nand_write_page_raw_syndrome; - if (!chip->ecc.read_oob) - chip->ecc.read_oob = nand_read_oob_syndrome; - if (!chip->ecc.write_oob) - chip->ecc.write_oob = nand_write_oob_syndrome; - - if (mtd->writesize >= chip->ecc.size) { - if (!chip->ecc.strength) { + if (!ecc->read_page) + ecc->read_page = nand_read_page_syndrome; + if (!ecc->write_page) + ecc->write_page = nand_write_page_syndrome; + if (!ecc->read_page_raw) + ecc->read_page_raw = nand_read_page_raw_syndrome; + if (!ecc->write_page_raw) + ecc->write_page_raw = nand_write_page_raw_syndrome; + if (!ecc->read_oob) + ecc->read_oob = nand_read_oob_syndrome; + if (!ecc->write_oob) + ecc->write_oob = nand_write_oob_syndrome; + + if (mtd->writesize >= ecc->size) { + if (!ecc->strength) { pr_warn("Driver must set ecc.strength when using hardware ECC\n"); BUG(); } @@ -3230,109 +4055,107 @@ int nand_scan_tail(struct mtd_info *mtd) } pr_warn("%d byte HW ECC not possible on " "%d byte page size, fallback to SW ECC\n", - chip->ecc.size, mtd->writesize); - chip->ecc.mode = NAND_ECC_SOFT; + ecc->size, mtd->writesize); + ecc->mode = NAND_ECC_SOFT; case NAND_ECC_SOFT: - chip->ecc.calculate = nand_calculate_ecc; - chip->ecc.correct = nand_correct_data; - chip->ecc.read_page = nand_read_page_swecc; - chip->ecc.read_subpage = nand_read_subpage; - chip->ecc.write_page = nand_write_page_swecc; - chip->ecc.read_page_raw = nand_read_page_raw; - chip->ecc.write_page_raw = nand_write_page_raw; - chip->ecc.read_oob = nand_read_oob_std; - chip->ecc.write_oob = nand_write_oob_std; - if (!chip->ecc.size) - chip->ecc.size = 256; - chip->ecc.bytes = 3; - chip->ecc.strength = 1; + ecc->calculate = nand_calculate_ecc; + ecc->correct = nand_correct_data; + ecc->read_page = nand_read_page_swecc; + ecc->read_subpage = nand_read_subpage; + ecc->write_page = nand_write_page_swecc; + ecc->read_page_raw = nand_read_page_raw; + ecc->write_page_raw = nand_write_page_raw; + ecc->read_oob = nand_read_oob_std; + ecc->write_oob = nand_write_oob_std; + if (!ecc->size) + ecc->size = 256; + ecc->bytes = 3; + ecc->strength = 1; break; case NAND_ECC_SOFT_BCH: if (!mtd_nand_has_bch()) { pr_warn("CONFIG_MTD_ECC_BCH not enabled\n"); - return -EINVAL; + BUG(); } - chip->ecc.calculate = nand_bch_calculate_ecc; - chip->ecc.correct = nand_bch_correct_data; - chip->ecc.read_page = nand_read_page_swecc; - chip->ecc.read_subpage = nand_read_subpage; - chip->ecc.write_page = nand_write_page_swecc; - chip->ecc.read_page_raw = nand_read_page_raw; - chip->ecc.write_page_raw = nand_write_page_raw; - chip->ecc.read_oob = nand_read_oob_std; - chip->ecc.write_oob = nand_write_oob_std; + ecc->calculate = nand_bch_calculate_ecc; + ecc->correct = nand_bch_correct_data; + ecc->read_page = nand_read_page_swecc; + ecc->read_subpage = nand_read_subpage; + ecc->write_page = nand_write_page_swecc; + ecc->read_page_raw = nand_read_page_raw; + ecc->write_page_raw = nand_write_page_raw; + ecc->read_oob = nand_read_oob_std; + ecc->write_oob = nand_write_oob_std; /* * Board driver should supply ecc.size and ecc.bytes values to * select how many bits are correctable; see nand_bch_init() * for details. Otherwise, default to 4 bits for large page * devices. */ - if (!chip->ecc.size && (mtd->oobsize >= 64)) { - chip->ecc.size = 512; - chip->ecc.bytes = 7; + if (!ecc->size && (mtd->oobsize >= 64)) { + ecc->size = 512; + ecc->bytes = 7; } - chip->ecc.priv = nand_bch_init(mtd, - chip->ecc.size, - chip->ecc.bytes, - &chip->ecc.layout); - if (!chip->ecc.priv) + ecc->priv = nand_bch_init(mtd, ecc->size, ecc->bytes, + &ecc->layout); + if (!ecc->priv) { pr_warn("BCH ECC initialization failed!\n"); - chip->ecc.strength = - chip->ecc.bytes * 8 / fls(8 * chip->ecc.size); + BUG(); + } + ecc->strength = ecc->bytes * 8 / fls(8 * ecc->size); break; case NAND_ECC_NONE: pr_warn("NAND_ECC_NONE selected by board driver. " - "This is not recommended !!\n"); - chip->ecc.read_page = nand_read_page_raw; - chip->ecc.write_page = nand_write_page_raw; - chip->ecc.read_oob = nand_read_oob_std; - chip->ecc.read_page_raw = nand_read_page_raw; - chip->ecc.write_page_raw = nand_write_page_raw; - chip->ecc.write_oob = nand_write_oob_std; - chip->ecc.size = mtd->writesize; - chip->ecc.bytes = 0; + "This is not recommended!\n"); + ecc->read_page = nand_read_page_raw; + ecc->write_page = nand_write_page_raw; + ecc->read_oob = nand_read_oob_std; + ecc->read_page_raw = nand_read_page_raw; + ecc->write_page_raw = nand_write_page_raw; + ecc->write_oob = nand_write_oob_std; + ecc->size = mtd->writesize; + ecc->bytes = 0; + ecc->strength = 0; break; default: - pr_warn("Invalid NAND_ECC_MODE %d\n", chip->ecc.mode); + pr_warn("Invalid NAND_ECC_MODE %d\n", ecc->mode); BUG(); } /* For many systems, the standard OOB write also works for raw */ - if (!chip->ecc.read_oob_raw) - chip->ecc.read_oob_raw = chip->ecc.read_oob; - if (!chip->ecc.write_oob_raw) - chip->ecc.write_oob_raw = chip->ecc.write_oob; + if (!ecc->read_oob_raw) + ecc->read_oob_raw = ecc->read_oob; + if (!ecc->write_oob_raw) + ecc->write_oob_raw = ecc->write_oob; /* * The number of bytes available for a client to place data into * the out of band area. */ - chip->ecc.layout->oobavail = 0; - for (i = 0; chip->ecc.layout->oobfree[i].length - && i < ARRAY_SIZE(chip->ecc.layout->oobfree); i++) - chip->ecc.layout->oobavail += - chip->ecc.layout->oobfree[i].length; - mtd->oobavail = chip->ecc.layout->oobavail; + ecc->layout->oobavail = 0; + for (i = 0; ecc->layout->oobfree[i].length + && i < ARRAY_SIZE(ecc->layout->oobfree); i++) + ecc->layout->oobavail += ecc->layout->oobfree[i].length; + mtd->oobavail = ecc->layout->oobavail; /* * Set the number of read / write steps for one page depending on ECC * mode. */ - chip->ecc.steps = mtd->writesize / chip->ecc.size; - if (chip->ecc.steps * chip->ecc.size != mtd->writesize) { + ecc->steps = mtd->writesize / ecc->size; + if (ecc->steps * ecc->size != mtd->writesize) { pr_warn("Invalid ECC parameters\n"); BUG(); } - chip->ecc.total = chip->ecc.steps * chip->ecc.bytes; + ecc->total = ecc->steps * ecc->bytes; /* Allow subpage writes up to ecc.steps. Not possible for MLC flash */ - if (!(chip->options & NAND_NO_SUBPAGE_WRITE) && - !(chip->cellinfo & NAND_CI_CELLTYPE_MSK)) { - switch (chip->ecc.steps) { + if (!(chip->options & NAND_NO_SUBPAGE_WRITE) && nand_is_slc(chip)) { + switch (ecc->steps) { case 2: mtd->subpage_sft = 1; break; @@ -3348,36 +4171,42 @@ int nand_scan_tail(struct mtd_info *mtd) /* Initialize state */ chip->state = FL_READY; - /* De-select the device */ - chip->select_chip(mtd, -1); - /* Invalidate the pagebuffer reference */ chip->pagebuf = -1; /* Large page NAND with SOFT_ECC should support subpage reads */ - if ((chip->ecc.mode == NAND_ECC_SOFT) && (chip->page_shift > 9)) + if ((ecc->mode == NAND_ECC_SOFT) && (chip->page_shift > 9)) chip->options |= NAND_SUBPAGE_READ; /* Fill in remaining MTD driver data */ - mtd->type = MTD_NANDFLASH; + mtd->type = nand_is_slc(chip) ? MTD_NANDFLASH : MTD_MLCNANDFLASH; mtd->flags = (chip->options & NAND_ROM) ? MTD_CAP_ROM : MTD_CAP_NANDFLASH; mtd->_erase = nand_erase; +#ifndef __UBOOT__ mtd->_point = NULL; mtd->_unpoint = NULL; +#endif mtd->_read = nand_read; mtd->_write = nand_write; + mtd->_panic_write = panic_nand_write; mtd->_read_oob = nand_read_oob; mtd->_write_oob = nand_write_oob; mtd->_sync = nand_sync; mtd->_lock = NULL; mtd->_unlock = NULL; +#ifndef __UBOOT__ + mtd->_suspend = nand_suspend; + mtd->_resume = nand_resume; +#endif mtd->_block_isbad = nand_block_isbad; mtd->_block_markbad = nand_block_markbad; + mtd->writebufsize = mtd->writesize; /* propagate ecc info to mtd_info */ - mtd->ecclayout = chip->ecc.layout; - mtd->ecc_strength = chip->ecc.strength; + mtd->ecclayout = ecc->layout; + mtd->ecc_strength = ecc->strength; + mtd->ecc_step_size = ecc->size; /* * Initialize bitflip_threshold to its default prior scan_bbt() call. * scan_bbt() might invoke mtd_read(), thus bitflip_threshold must be @@ -3388,10 +4217,24 @@ int nand_scan_tail(struct mtd_info *mtd) /* Check, if we should skip the bad block table scan */ if (chip->options & NAND_SKIP_BBTSCAN) - chip->options |= NAND_BBT_SCANNED; + return 0; - return 0; + /* Build bad block table */ + return chip->scan_bbt(mtd); } +EXPORT_SYMBOL(nand_scan_tail); + +/* + * is_module_text_address() isn't exported, and it's mostly a pointless + * test if this is a module _anyway_ -- they'd have to try _really_ hard + * to call us from in-kernel code if the core NAND support is modular. + */ +#ifdef MODULE +#define caller_is_module() (1) +#else +#define caller_is_module() \ + is_module_text_address((unsigned long)__builtin_return_address(0)) +#endif /** * nand_scan - [NAND Interface] Scan for the NAND device @@ -3407,12 +4250,20 @@ int nand_scan(struct mtd_info *mtd, int maxchips) { int ret; + /* Many callers got this wrong, so check for it for a while... */ + if (!mtd->owner && caller_is_module()) { + pr_crit("%s called with NULL mtd->owner!\n", __func__); + BUG(); + } + ret = nand_scan_ident(mtd, maxchips, NULL); if (!ret) ret = nand_scan_tail(mtd); return ret; } +EXPORT_SYMBOL(nand_scan); +#ifndef __UBOOT__ /** * nand_release - [NAND Interface] Free resources held by the NAND device * @mtd: MTD device structure @@ -3424,10 +4275,7 @@ void nand_release(struct mtd_info *mtd) if (chip->ecc.mode == NAND_ECC_SOFT_BCH) nand_bch_free((struct nand_bch_control *)chip->ecc.priv); -#ifdef CONFIG_MTD_PARTITIONS - /* Deregister partitions */ - del_mtd_partitions(mtd); -#endif + mtd_device_unregister(mtd); /* Free bad block table memory */ kfree(chip->bbt); @@ -3439,3 +4287,24 @@ void nand_release(struct mtd_info *mtd) & NAND_BBT_DYNAMICSTRUCT) kfree(chip->badblock_pattern); } +EXPORT_SYMBOL_GPL(nand_release); + +static int __init nand_base_init(void) +{ + led_trigger_register_simple("nand-disk", &nand_led_trigger); + return 0; +} + +static void __exit nand_base_exit(void) +{ + led_trigger_unregister_simple(nand_led_trigger); +} +#endif + +module_init(nand_base_init); +module_exit(nand_base_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Steven J. Hill <sjhill@realitydiluted.com>"); +MODULE_AUTHOR("Thomas Gleixner <tglx@linutronix.de>"); +MODULE_DESCRIPTION("Generic NAND flash driver code"); diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c index 8ef5845..c8f28c7 100644 --- a/drivers/mtd/nand/nand_bbt.c +++ b/drivers/mtd/nand/nand_bbt.c @@ -59,17 +59,55 @@ * */ -#include <common.h> -#include <malloc.h> -#include <linux/compat.h> +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/slab.h> +#include <linux/types.h> #include <linux/mtd/mtd.h> #include <linux/mtd/bbm.h> #include <linux/mtd/nand.h> #include <linux/mtd/nand_ecc.h> #include <linux/bitops.h> +#include <linux/delay.h> +#include <linux/vmalloc.h> +#include <linux/export.h> #include <linux/string.h> +#else +#include <common.h> +#include <malloc.h> +#include <linux/compat.h> + + #include <linux/mtd/mtd.h> + #include <linux/mtd/bbm.h> + #include <linux/mtd/nand.h> + #include <linux/mtd/nand_ecc.h> + #include <linux/bitops.h> + #include <linux/string.h> +#endif + +#define BBT_BLOCK_GOOD 0x00 +#define BBT_BLOCK_WORN 0x01 +#define BBT_BLOCK_RESERVED 0x02 +#define BBT_BLOCK_FACTORY_BAD 0x03 -#include <asm/errno.h> +#define BBT_ENTRY_MASK 0x03 +#define BBT_ENTRY_SHIFT 2 + +static int nand_update_bbt(struct mtd_info *mtd, loff_t offs); + +static inline uint8_t bbt_get_entry(struct nand_chip *chip, int block) +{ + uint8_t entry = chip->bbt[block >> BBT_ENTRY_SHIFT]; + entry >>= (block & BBT_ENTRY_MASK) * 2; + return entry & BBT_ENTRY_MASK; +} + +static inline void bbt_mark_entry(struct nand_chip *chip, int block, + uint8_t mark) +{ + uint8_t msk = (mark & BBT_ENTRY_MASK) << ((block & BBT_ENTRY_MASK) * 2); + chip->bbt[block >> BBT_ENTRY_SHIFT] |= msk; +} static int check_pattern_no_oob(uint8_t *buf, struct nand_bbt_descr *td) { @@ -86,33 +124,17 @@ static int check_pattern_no_oob(uint8_t *buf, struct nand_bbt_descr *td) * @td: search pattern descriptor * * Check for a pattern at the given place. Used to search bad block tables and - * good / bad block identifiers. If the SCAN_EMPTY option is set then check, if - * all bytes except the pattern area contain 0xff. + * good / bad block identifiers. */ static int check_pattern(uint8_t *buf, int len, int paglen, struct nand_bbt_descr *td) { - int end = 0; - uint8_t *p = buf; - if (td->options & NAND_BBT_NO_OOB) return check_pattern_no_oob(buf, td); - end = paglen + td->offs; - if (td->options & NAND_BBT_SCANEMPTY) - if (memchr_inv(p, 0xff, end)) - return -1; - p += end; - /* Compare the pattern */ - if (memcmp(p, td->pattern, td->len)) + if (memcmp(buf + paglen + td->offs, td->pattern, td->len)) return -1; - if (td->options & NAND_BBT_SCANEMPTY) { - p += td->len; - end += td->len; - if (memchr_inv(p, 0xff, len - end)) - return -1; - } return 0; } @@ -159,7 +181,7 @@ static u32 add_marker_len(struct nand_bbt_descr *td) * @page: the starting page * @num: the number of bbt descriptors to read * @td: the bbt describtion table - * @offs: offset in the memory table + * @offs: block number offset in the table * * Read the bad block table starting from page. */ @@ -209,25 +231,33 @@ static int read_bbt(struct mtd_info *mtd, uint8_t *buf, int page, int num, /* Analyse data */ for (i = 0; i < len; i++) { uint8_t dat = buf[i]; - for (j = 0; j < 8; j += bits, act += 2) { + for (j = 0; j < 8; j += bits, act++) { uint8_t tmp = (dat >> j) & msk; if (tmp == msk) continue; if (reserved_block_code && (tmp == reserved_block_code)) { pr_info("nand_read_bbt: reserved block at 0x%012llx\n", - (loff_t)((offs << 2) + (act >> 1)) << this->bbt_erase_shift); - this->bbt[offs + (act >> 3)] |= 0x2 << (act & 0x06); + (loff_t)(offs + act) << + this->bbt_erase_shift); + bbt_mark_entry(this, offs + act, + BBT_BLOCK_RESERVED); mtd->ecc_stats.bbtblocks++; continue; } - pr_info("nand_read_bbt: Bad block at 0x%012llx\n", - (loff_t)((offs << 2) + (act >> 1)) - << this->bbt_erase_shift); + /* + * Leave it for now, if it's matured we can + * move this message to pr_debug. + */ + pr_info("nand_read_bbt: bad block at 0x%012llx\n", + (loff_t)(offs + act) << + this->bbt_erase_shift); /* Factory marked bad or worn out? */ if (tmp == 0) - this->bbt[offs + (act >> 3)] |= 0x3 << (act & 0x06); + bbt_mark_entry(this, offs + act, + BBT_BLOCK_FACTORY_BAD); else - this->bbt[offs + (act >> 3)] |= 0x1 << (act & 0x06); + bbt_mark_entry(this, offs + act, + BBT_BLOCK_WORN); mtd->ecc_stats.badblocks++; } } @@ -262,7 +292,7 @@ static int read_abs_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_desc td, offs); if (res) return res; - offs += this->chipsize >> (this->bbt_erase_shift + 2); + offs += this->chipsize >> this->bbt_erase_shift; } } else { res = read_bbt(mtd, buf, td->pages[0], @@ -396,25 +426,6 @@ static void read_abs_bbts(struct mtd_info *mtd, uint8_t *buf, } } -/* Scan a given block full */ -static int scan_block_full(struct mtd_info *mtd, struct nand_bbt_descr *bd, - loff_t offs, uint8_t *buf, size_t readlen, - int scanlen, int numpages) -{ - int ret, j; - - ret = scan_read_oob(mtd, buf, offs, readlen); - /* Ignore ECC errors when checking for BBM */ - if (ret && !mtd_is_bitflip_or_eccerr(ret)) - return ret; - - for (j = 0; j < numpages; j++, buf += scanlen) { - if (check_pattern(buf, scanlen, mtd->writesize, bd)) - return 1; - } - return 0; -} - /* Scan a given block partially */ static int scan_block_fast(struct mtd_info *mtd, struct nand_bbt_descr *bd, loff_t offs, uint8_t *buf, int numpages) @@ -461,36 +472,19 @@ static int create_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr *bd, int chip) { struct nand_chip *this = mtd->priv; - int i, numblocks, numpages, scanlen; + int i, numblocks, numpages; int startblock; loff_t from; - size_t readlen; pr_info("Scanning device for bad blocks\n"); - if (bd->options & NAND_BBT_SCANALLPAGES) - numpages = 1 << (this->bbt_erase_shift - this->page_shift); - else if (bd->options & NAND_BBT_SCAN2NDPAGE) + if (bd->options & NAND_BBT_SCAN2NDPAGE) numpages = 2; else numpages = 1; - if (!(bd->options & NAND_BBT_SCANEMPTY)) { - /* We need only read few bytes from the OOB area */ - scanlen = 0; - readlen = bd->len; - } else { - /* Full page content should be read */ - scanlen = mtd->writesize + mtd->oobsize; - readlen = numpages * mtd->writesize; - } - if (chip == -1) { - /* - * Note that numblocks is 2 * (real numblocks) here, see i+=2 - * below as it makes shifting and masking less painful - */ - numblocks = mtd->size >> (this->bbt_erase_shift - 1); + numblocks = mtd->size >> this->bbt_erase_shift; startblock = 0; from = 0; } else { @@ -499,37 +493,31 @@ static int create_bbt(struct mtd_info *mtd, uint8_t *buf, chip + 1, this->numchips); return -EINVAL; } - numblocks = this->chipsize >> (this->bbt_erase_shift - 1); + numblocks = this->chipsize >> this->bbt_erase_shift; startblock = chip * numblocks; numblocks += startblock; - from = (loff_t)startblock << (this->bbt_erase_shift - 1); + from = (loff_t)startblock << this->bbt_erase_shift; } if (this->bbt_options & NAND_BBT_SCANLASTPAGE) from += mtd->erasesize - (mtd->writesize * numpages); - for (i = startblock; i < numblocks;) { + for (i = startblock; i < numblocks; i++) { int ret; BUG_ON(bd->options & NAND_BBT_NO_OOB); - if (bd->options & NAND_BBT_SCANALLPAGES) - ret = scan_block_full(mtd, bd, from, buf, readlen, - scanlen, numpages); - else - ret = scan_block_fast(mtd, bd, from, buf, numpages); - + ret = scan_block_fast(mtd, bd, from, buf, numpages); if (ret < 0) return ret; if (ret) { - this->bbt[i >> 3] |= 0x03 << (i & 0x6); + bbt_mark_entry(this, i, BBT_BLOCK_FACTORY_BAD); pr_warn("Bad eraseblock %d at 0x%012llx\n", - i >> 1, (unsigned long long)from); + i, (unsigned long long)from); mtd->ecc_stats.badblocks++; } - i += 2; from += (1 << this->bbt_erase_shift); } return 0; @@ -554,7 +542,11 @@ static int search_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr { struct nand_chip *this = mtd->priv; int i, chips; +#ifndef __UBOOT__ + int bits, startblock, block, dir; +#else int startblock, block, dir; +#endif int scanlen = mtd->writesize + mtd->oobsize; int bbtblocks; int blocktopage = this->bbt_erase_shift - this->page_shift; @@ -578,6 +570,11 @@ static int search_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr bbtblocks = mtd->size >> this->bbt_erase_shift; } +#ifndef __UBOOT__ + /* Number of bits for each erase block in the bbt */ + bits = td->options & NAND_BBT_NRBITS_MSK; +#endif + for (i = 0; i < chips; i++) { /* Reset version information */ td->version[i] = 0; @@ -606,8 +603,8 @@ static int search_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr if (td->pages[i] == -1) pr_warn("Bad block table not found for chip %d\n", i); else - pr_info("Bad block table found at page %d, version 0x%02X\n", td->pages[i], - td->version[i]); + pr_info("Bad block table found at page %d, version " + "0x%02X\n", td->pages[i], td->version[i]); } return 0; } @@ -649,9 +646,9 @@ static int write_bbt(struct mtd_info *mtd, uint8_t *buf, { struct nand_chip *this = mtd->priv; struct erase_info einfo; - int i, j, res, chip = 0; + int i, res, chip = 0; int bits, startblock, dir, page, offs, numblocks, sft, sftmsk; - int nrchips, bbtoffs, pageoffs, ooboffs; + int nrchips, pageoffs, ooboffs; uint8_t msk[4]; uint8_t rcode = td->reserved_block_code; size_t retlen, len = 0; @@ -707,10 +704,9 @@ static int write_bbt(struct mtd_info *mtd, uint8_t *buf, for (i = 0; i < td->maxblocks; i++) { int block = startblock + dir * i; /* Check, if the block is bad */ - switch ((this->bbt[block >> 2] >> - (2 * (block & 0x03))) & 0x03) { - case 0x01: - case 0x03: + switch (bbt_get_entry(this, block)) { + case BBT_BLOCK_WORN: + case BBT_BLOCK_FACTORY_BAD: continue; } page = block << @@ -742,8 +738,6 @@ static int write_bbt(struct mtd_info *mtd, uint8_t *buf, default: return -EINVAL; } - bbtoffs = chip * (numblocks >> 2); - to = ((loff_t)page) << this->page_shift; /* Must we save the block contents? */ @@ -808,16 +802,12 @@ static int write_bbt(struct mtd_info *mtd, uint8_t *buf, buf[ooboffs + td->veroffs] = td->version[chip]; /* Walk through the memory table */ - for (i = 0; i < numblocks;) { + for (i = 0; i < numblocks; i++) { uint8_t dat; - dat = this->bbt[bbtoffs + (i >> 2)]; - for (j = 0; j < 4; j++, i++) { - int sftcnt = (i << (3 - sft)) & sftmsk; - /* Do not store the reserved bbt blocks! */ - buf[offs + (i >> sft)] &= - ~(msk[dat & 0x03] << sftcnt); - dat >>= 2; - } + int sftcnt = (i << (3 - sft)) & sftmsk; + dat = bbt_get_entry(this, chip * numblocks + i); + /* Do not store the reserved bbt blocks! */ + buf[offs + (i >> sft)] &= ~(msk[dat] << sftcnt); } memset(&einfo, 0, sizeof(einfo)); @@ -859,7 +849,6 @@ static inline int nand_memory_bbt(struct mtd_info *mtd, struct nand_bbt_descr *b { struct nand_chip *this = mtd->priv; - bd->options &= ~NAND_BBT_SCANEMPTY; return create_bbt(mtd, this->buffers->databuf, bd, -1); } @@ -1003,7 +992,7 @@ static void mark_bbt_region(struct mtd_info *mtd, struct nand_bbt_descr *td) { struct nand_chip *this = mtd->priv; int i, j, chips, block, nrblocks, update; - uint8_t oldval, newval; + uint8_t oldval; /* Do we have a bbt per chip? */ if (td->options & NAND_BBT_PERCHIP) { @@ -1020,12 +1009,12 @@ static void mark_bbt_region(struct mtd_info *mtd, struct nand_bbt_descr *td) if (td->pages[i] == -1) continue; block = td->pages[i] >> (this->bbt_erase_shift - this->page_shift); - block <<= 1; - oldval = this->bbt[(block >> 3)]; - newval = oldval | (0x2 << (block & 0x06)); - this->bbt[(block >> 3)] = newval; - if ((oldval != newval) && td->reserved_block_code) - nand_update_bbt(mtd, (loff_t)block << (this->bbt_erase_shift - 1)); + oldval = bbt_get_entry(this, block); + bbt_mark_entry(this, block, BBT_BLOCK_RESERVED); + if ((oldval != BBT_BLOCK_RESERVED) && + td->reserved_block_code) + nand_update_bbt(mtd, (loff_t)block << + this->bbt_erase_shift); continue; } update = 0; @@ -1033,14 +1022,12 @@ static void mark_bbt_region(struct mtd_info *mtd, struct nand_bbt_descr *td) block = ((i + 1) * nrblocks) - td->maxblocks; else block = i * nrblocks; - block <<= 1; for (j = 0; j < td->maxblocks; j++) { - oldval = this->bbt[(block >> 3)]; - newval = oldval | (0x2 << (block & 0x06)); - this->bbt[(block >> 3)] = newval; - if (oldval != newval) + oldval = bbt_get_entry(this, block); + bbt_mark_entry(this, block, BBT_BLOCK_RESERVED); + if (oldval != BBT_BLOCK_RESERVED) update = 1; - block += 2; + block++; } /* * If we want reserved blocks to be recorded to flash, and some @@ -1048,7 +1035,8 @@ static void mark_bbt_region(struct mtd_info *mtd, struct nand_bbt_descr *td) * bbts. This should only happen once. */ if (update && td->reserved_block_code) - nand_update_bbt(mtd, (loff_t)(block - 2) << (this->bbt_erase_shift - 1)); + nand_update_bbt(mtd, (loff_t)(block - 1) << + this->bbt_erase_shift); } } @@ -1174,13 +1162,13 @@ int nand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd) } /** - * nand_update_bbt - [NAND Interface] update bad block table(s) + * nand_update_bbt - update bad block table(s) * @mtd: MTD device structure * @offs: the offset of the newly marked block * * The function updates the bad block table(s). */ -int nand_update_bbt(struct mtd_info *mtd, loff_t offs) +static int nand_update_bbt(struct mtd_info *mtd, loff_t offs) { struct nand_chip *this = mtd->priv; int len, res = 0; @@ -1234,15 +1222,6 @@ int nand_update_bbt(struct mtd_info *mtd, loff_t offs) */ static uint8_t scan_ff_pattern[] = { 0xff, 0xff }; -static uint8_t scan_agand_pattern[] = { 0x1C, 0x71, 0xC7, 0x1C, 0x71, 0xC7 }; - -static struct nand_bbt_descr agand_flashbased = { - .options = NAND_BBT_SCANEMPTY | NAND_BBT_SCANALLPAGES, - .offs = 0x20, - .len = 6, - .pattern = scan_agand_pattern -}; - /* Generic flash bbt descriptors */ static uint8_t bbt_pattern[] = {'B', 'b', 't', '0' }; static uint8_t mirror_pattern[] = {'1', 't', 'b', 'B' }; @@ -1327,22 +1306,6 @@ int nand_default_bbt(struct mtd_info *mtd) { struct nand_chip *this = mtd->priv; - /* - * Default for AG-AND. We must use a flash based bad block table as the - * devices have factory marked _good_ blocks. Erasing those blocks - * leads to loss of the good / bad information, so we _must_ store this - * information in a good / bad table during startup. - */ - if (this->options & NAND_IS_AND) { - /* Use the default pattern descriptors */ - if (!this->bbt_td) { - this->bbt_td = &bbt_main_descr; - this->bbt_md = &bbt_mirror_descr; - } - this->bbt_options |= NAND_BBT_USE_FLASH; - return nand_scan_bbt(mtd, &agand_flashbased); - } - /* Is a flash based bad block table requested? */ if (this->bbt_options & NAND_BBT_USE_FLASH) { /* Use the default pattern descriptors */ @@ -1375,23 +1338,46 @@ int nand_default_bbt(struct mtd_info *mtd) int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt) { struct nand_chip *this = mtd->priv; - int block; - uint8_t res; + int block, res; - /* Get block number * 2 */ - block = (int)(offs >> (this->bbt_erase_shift - 1)); - res = (this->bbt[block >> 3] >> (block & 0x06)) & 0x03; + block = (int)(offs >> this->bbt_erase_shift); + res = bbt_get_entry(this, block); - MTDDEBUG(MTD_DEBUG_LEVEL2, "nand_isbad_bbt(): bbt info for offs 0x%08x: (block %d) 0x%02x\n", - (unsigned int)offs, block >> 1, res); + pr_debug("nand_isbad_bbt(): bbt info for offs 0x%08x: " + "(block %d) 0x%02x\n", + (unsigned int)offs, block, res); - switch ((int)res) { - case 0x00: + switch (res) { + case BBT_BLOCK_GOOD: return 0; - case 0x01: + case BBT_BLOCK_WORN: return 1; - case 0x02: + case BBT_BLOCK_RESERVED: return allowbbt ? 0 : 1; } return 1; } + +/** + * nand_markbad_bbt - [NAND Interface] Mark a block bad in the BBT + * @mtd: MTD device structure + * @offs: offset of the bad block + */ +int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs) +{ + struct nand_chip *this = mtd->priv; + int block, ret = 0; + + block = (int)(offs >> this->bbt_erase_shift); + + /* Mark bad block in memory */ + bbt_mark_entry(this, block, BBT_BLOCK_WORN); + + /* Update flash-based bad block table */ + if (this->bbt_options & NAND_BBT_USE_FLASH) + ret = nand_update_bbt(mtd, offs); + + return ret; +} + +EXPORT_SYMBOL(nand_scan_bbt); diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c index f3f0cb6..2da8d08 100644 --- a/drivers/mtd/nand/nand_ids.c +++ b/drivers/mtd/nand/nand_ids.c @@ -8,165 +8,172 @@ * published by the Free Software Foundation. * */ - +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/module.h> +#include <linux/mtd/nand.h> +#else #include <common.h> #include <linux/mtd/nand.h> -/* -* Chip ID list -* -* Name. ID code, pagesize, chipsize in MegaByte, eraseblock size, -* options -* -* Pagesize; 0, 256, 512 -* 0 get this information from the extended chip ID -+ 256 256 Byte page size -* 512 512 Byte page size -*/ -const struct nand_flash_dev nand_flash_ids[] = { - -#ifdef CONFIG_MTD_NAND_MUSEUM_IDS - {"NAND 1MiB 5V 8-bit", 0x6e, 256, 1, 0x1000, 0}, - {"NAND 2MiB 5V 8-bit", 0x64, 256, 2, 0x1000, 0}, - {"NAND 4MiB 5V 8-bit", 0x6b, 512, 4, 0x2000, 0}, - {"NAND 1MiB 3,3V 8-bit", 0xe8, 256, 1, 0x1000, 0}, - {"NAND 1MiB 3,3V 8-bit", 0xec, 256, 1, 0x1000, 0}, - {"NAND 2MiB 3,3V 8-bit", 0xea, 256, 2, 0x1000, 0}, - {"NAND 4MiB 3,3V 8-bit", 0xd5, 512, 4, 0x2000, 0}, - {"NAND 4MiB 3,3V 8-bit", 0xe3, 512, 4, 0x2000, 0}, - {"NAND 4MiB 3,3V 8-bit", 0xe5, 512, 4, 0x2000, 0}, - {"NAND 8MiB 3,3V 8-bit", 0xd6, 512, 8, 0x2000, 0}, - - {"NAND 8MiB 1,8V 8-bit", 0x39, 512, 8, 0x2000, 0}, - {"NAND 8MiB 3,3V 8-bit", 0xe6, 512, 8, 0x2000, 0}, - {"NAND 8MiB 1,8V 16-bit", 0x49, 512, 8, 0x2000, NAND_BUSWIDTH_16}, - {"NAND 8MiB 3,3V 16-bit", 0x59, 512, 8, 0x2000, NAND_BUSWIDTH_16}, #endif +#include <linux/sizes.h> - {"NAND 16MiB 1,8V 8-bit", 0x33, 512, 16, 0x4000, 0}, - {"NAND 16MiB 3,3V 8-bit", 0x73, 512, 16, 0x4000, 0}, - {"NAND 16MiB 1,8V 16-bit", 0x43, 512, 16, 0x4000, NAND_BUSWIDTH_16}, - {"NAND 16MiB 3,3V 16-bit", 0x53, 512, 16, 0x4000, NAND_BUSWIDTH_16}, - - {"NAND 32MiB 1,8V 8-bit", 0x35, 512, 32, 0x4000, 0}, - {"NAND 32MiB 3,3V 8-bit", 0x75, 512, 32, 0x4000, 0}, - {"NAND 32MiB 1,8V 16-bit", 0x45, 512, 32, 0x4000, NAND_BUSWIDTH_16}, - {"NAND 32MiB 3,3V 16-bit", 0x55, 512, 32, 0x4000, NAND_BUSWIDTH_16}, - - {"NAND 64MiB 1,8V 8-bit", 0x36, 512, 64, 0x4000, 0}, - {"NAND 64MiB 3,3V 8-bit", 0x76, 512, 64, 0x4000, 0}, - {"NAND 64MiB 1,8V 16-bit", 0x46, 512, 64, 0x4000, NAND_BUSWIDTH_16}, - {"NAND 64MiB 3,3V 16-bit", 0x56, 512, 64, 0x4000, NAND_BUSWIDTH_16}, +#define LP_OPTIONS NAND_SAMSUNG_LP_OPTIONS +#define LP_OPTIONS16 (LP_OPTIONS | NAND_BUSWIDTH_16) - {"NAND 128MiB 1,8V 8-bit", 0x78, 512, 128, 0x4000, 0}, - {"NAND 128MiB 1,8V 8-bit", 0x39, 512, 128, 0x4000, 0}, - {"NAND 128MiB 3,3V 8-bit", 0x79, 512, 128, 0x4000, 0}, - {"NAND 128MiB 1,8V 16-bit", 0x72, 512, 128, 0x4000, NAND_BUSWIDTH_16}, - {"NAND 128MiB 1,8V 16-bit", 0x49, 512, 128, 0x4000, NAND_BUSWIDTH_16}, - {"NAND 128MiB 3,3V 16-bit", 0x74, 512, 128, 0x4000, NAND_BUSWIDTH_16}, - {"NAND 128MiB 3,3V 16-bit", 0x59, 512, 128, 0x4000, NAND_BUSWIDTH_16}, +#define SP_OPTIONS NAND_NEED_READRDY +#define SP_OPTIONS16 (SP_OPTIONS | NAND_BUSWIDTH_16) - {"NAND 256MiB 3,3V 8-bit", 0x71, 512, 256, 0x4000, 0}, +/* + * The chip ID list: + * name, device ID, page size, chip size in MiB, eraseblock size, options + * + * If page size and eraseblock size are 0, the sizes are taken from the + * extended chip ID. + */ +struct nand_flash_dev nand_flash_ids[] = { +#ifdef CONFIG_MTD_NAND_MUSEUM_IDS + LEGACY_ID_NAND("NAND 1MiB 5V 8-bit", 0x6e, 1, SZ_4K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 2MiB 5V 8-bit", 0x64, 2, SZ_4K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 1MiB 3,3V 8-bit", 0xe8, 1, SZ_4K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 1MiB 3,3V 8-bit", 0xec, 1, SZ_4K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 2MiB 3,3V 8-bit", 0xea, 2, SZ_4K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 4MiB 3,3V 8-bit", 0xd5, 4, SZ_8K, SP_OPTIONS), + + LEGACY_ID_NAND("NAND 8MiB 3,3V 8-bit", 0xe6, 8, SZ_8K, SP_OPTIONS), +#endif + /* + * Some incompatible NAND chips share device ID's and so must be + * listed by full ID. We list them first so that we can easily identify + * the most specific match. + */ + {"TC58NVG2S0F 4G 3.3V 8-bit", + { .id = {0x98, 0xdc, 0x90, 0x26, 0x76, 0x15, 0x01, 0x08} }, + SZ_4K, SZ_512, SZ_256K, 0, 8, 224, NAND_ECC_INFO(4, SZ_512) }, + {"TC58NVG3S0F 8G 3.3V 8-bit", + { .id = {0x98, 0xd3, 0x90, 0x26, 0x76, 0x15, 0x02, 0x08} }, + SZ_4K, SZ_1K, SZ_256K, 0, 8, 232, NAND_ECC_INFO(4, SZ_512) }, + {"TC58NVG5D2 32G 3.3V 8-bit", + { .id = {0x98, 0xd7, 0x94, 0x32, 0x76, 0x56, 0x09, 0x00} }, + SZ_8K, SZ_4K, SZ_1M, 0, 8, 640, NAND_ECC_INFO(40, SZ_1K) }, + {"TC58NVG6D2 64G 3.3V 8-bit", + { .id = {0x98, 0xde, 0x94, 0x82, 0x76, 0x56, 0x04, 0x20} }, + SZ_8K, SZ_8K, SZ_2M, 0, 8, 640, NAND_ECC_INFO(40, SZ_1K) }, + + LEGACY_ID_NAND("NAND 4MiB 5V 8-bit", 0x6B, 4, SZ_8K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 4MiB 3,3V 8-bit", 0xE3, 4, SZ_8K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 4MiB 3,3V 8-bit", 0xE5, 4, SZ_8K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 8MiB 3,3V 8-bit", 0xD6, 8, SZ_8K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 8MiB 3,3V 8-bit", 0xE6, 8, SZ_8K, SP_OPTIONS), + + LEGACY_ID_NAND("NAND 16MiB 1,8V 8-bit", 0x33, 16, SZ_16K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 16MiB 3,3V 8-bit", 0x73, 16, SZ_16K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 16MiB 1,8V 16-bit", 0x43, 16, SZ_16K, SP_OPTIONS16), + LEGACY_ID_NAND("NAND 16MiB 3,3V 16-bit", 0x53, 16, SZ_16K, SP_OPTIONS16), + + LEGACY_ID_NAND("NAND 32MiB 1,8V 8-bit", 0x35, 32, SZ_16K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 32MiB 3,3V 8-bit", 0x75, 32, SZ_16K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 32MiB 1,8V 16-bit", 0x45, 32, SZ_16K, SP_OPTIONS16), + LEGACY_ID_NAND("NAND 32MiB 3,3V 16-bit", 0x55, 32, SZ_16K, SP_OPTIONS16), + + LEGACY_ID_NAND("NAND 64MiB 1,8V 8-bit", 0x36, 64, SZ_16K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 64MiB 3,3V 8-bit", 0x76, 64, SZ_16K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 64MiB 1,8V 16-bit", 0x46, 64, SZ_16K, SP_OPTIONS16), + LEGACY_ID_NAND("NAND 64MiB 3,3V 16-bit", 0x56, 64, SZ_16K, SP_OPTIONS16), + + LEGACY_ID_NAND("NAND 128MiB 1,8V 8-bit", 0x78, 128, SZ_16K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 128MiB 1,8V 8-bit", 0x39, 128, SZ_16K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 128MiB 3,3V 8-bit", 0x79, 128, SZ_16K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 128MiB 1,8V 16-bit", 0x72, 128, SZ_16K, SP_OPTIONS16), + LEGACY_ID_NAND("NAND 128MiB 1,8V 16-bit", 0x49, 128, SZ_16K, SP_OPTIONS16), + LEGACY_ID_NAND("NAND 128MiB 3,3V 16-bit", 0x74, 128, SZ_16K, SP_OPTIONS16), + LEGACY_ID_NAND("NAND 128MiB 3,3V 16-bit", 0x59, 128, SZ_16K, SP_OPTIONS16), + + LEGACY_ID_NAND("NAND 256MiB 3,3V 8-bit", 0x71, 256, SZ_16K, SP_OPTIONS), /* - * These are the new chips with large page size. The pagesize and the - * erasesize is determined from the extended id bytes + * These are the new chips with large page size. Their page size and + * eraseblock size are determined from the extended ID bytes. */ -#define LP_OPTIONS NAND_SAMSUNG_LP_OPTIONS -#define LP_OPTIONS16 (LP_OPTIONS | NAND_BUSWIDTH_16) /* 512 Megabit */ - {"NAND 64MiB 1,8V 8-bit", 0xA2, 0, 64, 0, LP_OPTIONS}, - {"NAND 64MiB 1,8V 8-bit", 0xA0, 0, 64, 0, LP_OPTIONS}, - {"NAND 64MiB 3,3V 8-bit", 0xF2, 0, 64, 0, LP_OPTIONS}, - {"NAND 64MiB 3,3V 8-bit", 0xD0, 0, 64, 0, LP_OPTIONS}, - {"NAND 64MiB 3,3V 8-bit", 0xF0, 0, 64, 0, LP_OPTIONS}, - {"NAND 64MiB 1,8V 16-bit", 0xB2, 0, 64, 0, LP_OPTIONS16}, - {"NAND 64MiB 1,8V 16-bit", 0xB0, 0, 64, 0, LP_OPTIONS16}, - {"NAND 64MiB 3,3V 16-bit", 0xC2, 0, 64, 0, LP_OPTIONS16}, - {"NAND 64MiB 3,3V 16-bit", 0xC0, 0, 64, 0, LP_OPTIONS16}, + EXTENDED_ID_NAND("NAND 64MiB 1,8V 8-bit", 0xA2, 64, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 64MiB 1,8V 8-bit", 0xA0, 64, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 64MiB 3,3V 8-bit", 0xF2, 64, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 64MiB 3,3V 8-bit", 0xD0, 64, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 64MiB 3,3V 8-bit", 0xF0, 64, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 64MiB 1,8V 16-bit", 0xB2, 64, LP_OPTIONS16), + EXTENDED_ID_NAND("NAND 64MiB 1,8V 16-bit", 0xB0, 64, LP_OPTIONS16), + EXTENDED_ID_NAND("NAND 64MiB 3,3V 16-bit", 0xC2, 64, LP_OPTIONS16), + EXTENDED_ID_NAND("NAND 64MiB 3,3V 16-bit", 0xC0, 64, LP_OPTIONS16), /* 1 Gigabit */ - {"NAND 128MiB 1,8V 8-bit", 0xA1, 0, 128, 0, LP_OPTIONS}, - {"NAND 128MiB 3,3V 8-bit", 0xF1, 0, 128, 0, LP_OPTIONS}, - {"NAND 128MiB 3,3V 8-bit", 0xD1, 0, 128, 0, LP_OPTIONS}, - {"NAND 128MiB 1,8V 16-bit", 0xB1, 0, 128, 0, LP_OPTIONS16}, - {"NAND 128MiB 3,3V 16-bit", 0xC1, 0, 128, 0, LP_OPTIONS16}, - {"NAND 128MiB 1,8V 16-bit", 0xAD, 0, 128, 0, LP_OPTIONS16}, + EXTENDED_ID_NAND("NAND 128MiB 1,8V 8-bit", 0xA1, 128, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 128MiB 3,3V 8-bit", 0xF1, 128, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 128MiB 3,3V 8-bit", 0xD1, 128, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 128MiB 1,8V 16-bit", 0xB1, 128, LP_OPTIONS16), + EXTENDED_ID_NAND("NAND 128MiB 3,3V 16-bit", 0xC1, 128, LP_OPTIONS16), + EXTENDED_ID_NAND("NAND 128MiB 1,8V 16-bit", 0xAD, 128, LP_OPTIONS16), /* 2 Gigabit */ - {"NAND 256MiB 1,8V 8-bit", 0xAA, 0, 256, 0, LP_OPTIONS}, - {"NAND 256MiB 3,3V 8-bit", 0xDA, 0, 256, 0, LP_OPTIONS}, - {"NAND 256MiB 1,8V 16-bit", 0xBA, 0, 256, 0, LP_OPTIONS16}, - {"NAND 256MiB 3,3V 16-bit", 0xCA, 0, 256, 0, LP_OPTIONS16}, + EXTENDED_ID_NAND("NAND 256MiB 1,8V 8-bit", 0xAA, 256, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 256MiB 3,3V 8-bit", 0xDA, 256, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 256MiB 1,8V 16-bit", 0xBA, 256, LP_OPTIONS16), + EXTENDED_ID_NAND("NAND 256MiB 3,3V 16-bit", 0xCA, 256, LP_OPTIONS16), /* 4 Gigabit */ - {"NAND 512MiB 1,8V 8-bit", 0xAC, 0, 512, 0, LP_OPTIONS}, - {"NAND 512MiB 3,3V 8-bit", 0xDC, 0, 512, 0, LP_OPTIONS}, - {"NAND 512MiB 1,8V 16-bit", 0xBC, 0, 512, 0, LP_OPTIONS16}, - {"NAND 512MiB 3,3V 16-bit", 0xCC, 0, 512, 0, LP_OPTIONS16}, + EXTENDED_ID_NAND("NAND 512MiB 1,8V 8-bit", 0xAC, 512, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 512MiB 3,3V 8-bit", 0xDC, 512, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 512MiB 1,8V 16-bit", 0xBC, 512, LP_OPTIONS16), + EXTENDED_ID_NAND("NAND 512MiB 3,3V 16-bit", 0xCC, 512, LP_OPTIONS16), /* 8 Gigabit */ - {"NAND 1GiB 1,8V 8-bit", 0xA3, 0, 1024, 0, LP_OPTIONS}, - {"NAND 1GiB 3,3V 8-bit", 0xD3, 0, 1024, 0, LP_OPTIONS}, - {"NAND 1GiB 1,8V 16-bit", 0xB3, 0, 1024, 0, LP_OPTIONS16}, - {"NAND 1GiB 3,3V 16-bit", 0xC3, 0, 1024, 0, LP_OPTIONS16}, + EXTENDED_ID_NAND("NAND 1GiB 1,8V 8-bit", 0xA3, 1024, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 1GiB 3,3V 8-bit", 0xD3, 1024, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 1GiB 1,8V 16-bit", 0xB3, 1024, LP_OPTIONS16), + EXTENDED_ID_NAND("NAND 1GiB 3,3V 16-bit", 0xC3, 1024, LP_OPTIONS16), /* 16 Gigabit */ - {"NAND 2GiB 1,8V 8-bit", 0xA5, 0, 2048, 0, LP_OPTIONS}, - {"NAND 2GiB 3,3V 8-bit", 0xD5, 0, 2048, 0, LP_OPTIONS}, - {"NAND 2GiB 1,8V 16-bit", 0xB5, 0, 2048, 0, LP_OPTIONS16}, - {"NAND 2GiB 3,3V 16-bit", 0xC5, 0, 2048, 0, LP_OPTIONS16}, + EXTENDED_ID_NAND("NAND 2GiB 1,8V 8-bit", 0xA5, 2048, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 2GiB 3,3V 8-bit", 0xD5, 2048, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 2GiB 1,8V 16-bit", 0xB5, 2048, LP_OPTIONS16), + EXTENDED_ID_NAND("NAND 2GiB 3,3V 16-bit", 0xC5, 2048, LP_OPTIONS16), /* 32 Gigabit */ - {"NAND 4GiB 1,8V 8-bit", 0xA7, 0, 4096, 0, LP_OPTIONS}, - {"NAND 4GiB 3,3V 8-bit", 0xD7, 0, 4096, 0, LP_OPTIONS}, - {"NAND 4GiB 1,8V 16-bit", 0xB7, 0, 4096, 0, LP_OPTIONS16}, - {"NAND 4GiB 3,3V 16-bit", 0xC7, 0, 4096, 0, LP_OPTIONS16}, + EXTENDED_ID_NAND("NAND 4GiB 1,8V 8-bit", 0xA7, 4096, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 4GiB 3,3V 8-bit", 0xD7, 4096, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 4GiB 1,8V 16-bit", 0xB7, 4096, LP_OPTIONS16), + EXTENDED_ID_NAND("NAND 4GiB 3,3V 16-bit", 0xC7, 4096, LP_OPTIONS16), /* 64 Gigabit */ - {"NAND 8GiB 1,8V 8-bit", 0xAE, 0, 8192, 0, LP_OPTIONS}, - {"NAND 8GiB 3,3V 8-bit", 0xDE, 0, 8192, 0, LP_OPTIONS}, - {"NAND 8GiB 1,8V 16-bit", 0xBE, 0, 8192, 0, LP_OPTIONS16}, - {"NAND 8GiB 3,3V 16-bit", 0xCE, 0, 8192, 0, LP_OPTIONS16}, + EXTENDED_ID_NAND("NAND 8GiB 1,8V 8-bit", 0xAE, 8192, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 8GiB 3,3V 8-bit", 0xDE, 8192, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 8GiB 1,8V 16-bit", 0xBE, 8192, LP_OPTIONS16), + EXTENDED_ID_NAND("NAND 8GiB 3,3V 16-bit", 0xCE, 8192, LP_OPTIONS16), /* 128 Gigabit */ - {"NAND 16GiB 1,8V 8-bit", 0x1A, 0, 16384, 0, LP_OPTIONS}, - {"NAND 16GiB 3,3V 8-bit", 0x3A, 0, 16384, 0, LP_OPTIONS}, - {"NAND 16GiB 1,8V 16-bit", 0x2A, 0, 16384, 0, LP_OPTIONS16}, - {"NAND 16GiB 3,3V 16-bit", 0x4A, 0, 16384, 0, LP_OPTIONS16}, + EXTENDED_ID_NAND("NAND 16GiB 1,8V 8-bit", 0x1A, 16384, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 16GiB 3,3V 8-bit", 0x3A, 16384, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 16GiB 1,8V 16-bit", 0x2A, 16384, LP_OPTIONS16), + EXTENDED_ID_NAND("NAND 16GiB 3,3V 16-bit", 0x4A, 16384, LP_OPTIONS16), /* 256 Gigabit */ - {"NAND 32GiB 1,8V 8-bit", 0x1C, 0, 32768, 0, LP_OPTIONS}, - {"NAND 32GiB 3,3V 8-bit", 0x3C, 0, 32768, 0, LP_OPTIONS}, - {"NAND 32GiB 1,8V 16-bit", 0x2C, 0, 32768, 0, LP_OPTIONS16}, - {"NAND 32GiB 3,3V 16-bit", 0x4C, 0, 32768, 0, LP_OPTIONS16}, + EXTENDED_ID_NAND("NAND 32GiB 1,8V 8-bit", 0x1C, 32768, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 32GiB 3,3V 8-bit", 0x3C, 32768, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 32GiB 1,8V 16-bit", 0x2C, 32768, LP_OPTIONS16), + EXTENDED_ID_NAND("NAND 32GiB 3,3V 16-bit", 0x4C, 32768, LP_OPTIONS16), /* 512 Gigabit */ - {"NAND 64GiB 1,8V 8-bit", 0x1E, 0, 65536, 0, LP_OPTIONS}, - {"NAND 64GiB 3,3V 8-bit", 0x3E, 0, 65536, 0, LP_OPTIONS}, - {"NAND 64GiB 1,8V 16-bit", 0x2E, 0, 65536, 0, LP_OPTIONS16}, - {"NAND 64GiB 3,3V 16-bit", 0x4E, 0, 65536, 0, LP_OPTIONS16}, + EXTENDED_ID_NAND("NAND 64GiB 1,8V 8-bit", 0x1E, 65536, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 64GiB 3,3V 8-bit", 0x3E, 65536, LP_OPTIONS), + EXTENDED_ID_NAND("NAND 64GiB 1,8V 16-bit", 0x2E, 65536, LP_OPTIONS16), + EXTENDED_ID_NAND("NAND 64GiB 3,3V 16-bit", 0x4E, 65536, LP_OPTIONS16), - /* - * Renesas AND 1 Gigabit. Those chips do not support extended id and - * have a strange page/block layout ! The chosen minimum erasesize is - * 4 * 2 * 2048 = 16384 Byte, as those chips have an array of 4 page - * planes 1 block = 2 pages, but due to plane arrangement the blocks - * 0-3 consists of page 0 + 4,1 + 5, 2 + 6, 3 + 7 Anyway JFFS2 would - * increase the eraseblock size so we chose a combined one which can be - * erased in one go There are more speed improvements for reads and - * writes possible, but not implemented now - */ - {"AND 128MiB 3,3V 8-bit", 0x01, 2048, 128, 0x4000, - NAND_IS_AND | NAND_4PAGE_ARRAY | BBT_AUTO_REFRESH}, - - {NULL,} + {NULL} }; -/* -* Manufacturer ID list -*/ -const struct nand_manufacturers nand_manuf_ids[] = { +/* Manufacturer IDs */ +struct nand_manufacturers nand_manuf_ids[] = { {NAND_MFR_TOSHIBA, "Toshiba"}, {NAND_MFR_SAMSUNG, "Samsung"}, {NAND_MFR_FUJITSU, "Fujitsu"}, @@ -178,5 +185,14 @@ const struct nand_manufacturers nand_manuf_ids[] = { {NAND_MFR_AMD, "AMD/Spansion"}, {NAND_MFR_MACRONIX, "Macronix"}, {NAND_MFR_EON, "Eon"}, + {NAND_MFR_SANDISK, "SanDisk"}, + {NAND_MFR_INTEL, "Intel"}, {0x0, "Unknown"} }; + +EXPORT_SYMBOL(nand_manuf_ids); +EXPORT_SYMBOL(nand_flash_ids); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Thomas Gleixner <tglx@linutronix.de>"); +MODULE_DESCRIPTION("Nand device & manufacturer IDs"); diff --git a/drivers/mtd/nand/nand_util.c b/drivers/mtd/nand/nand_util.c index b292826..024f6fb 100644 --- a/drivers/mtd/nand/nand_util.c +++ b/drivers/mtd/nand/nand_util.c @@ -187,6 +187,9 @@ int nand_erase_opts(nand_info_t *meminfo, const nand_erase_options_t *opts) #ifdef CONFIG_CMD_NAND_LOCK_UNLOCK +#define NAND_CMD_LOCK_TIGHT 0x2c +#define NAND_CMD_LOCK_STATUS 0x7a + /****************************************************************************** * Support for locking / unlocking operations of some NAND devices *****************************************************************************/ diff --git a/drivers/mtd/nand/ndfc.c b/drivers/mtd/nand/ndfc.c index 5510b13..2659595 100644 --- a/drivers/mtd/nand/ndfc.c +++ b/drivers/mtd/nand/ndfc.c @@ -118,6 +118,7 @@ static void ndfc_write_buf(struct mtd_info *mtdinfo, const uint8_t *buf, int len out_be32((u32 *)(base + NDFC_DATA), *p++); } +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) static int ndfc_verify_buf(struct mtd_info *mtdinfo, const uint8_t *buf, int len) { struct nand_chip *this = mtdinfo->priv; @@ -130,6 +131,7 @@ static int ndfc_verify_buf(struct mtd_info *mtdinfo, const uint8_t *buf, int len return 0; } +#endif /* * Read a byte from the NDFC. @@ -205,7 +207,9 @@ int board_nand_init(struct nand_chip *nand) #endif nand->write_buf = ndfc_write_buf; +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) nand->verify_buf = ndfc_verify_buf; +#endif nand->read_byte = ndfc_read_byte; chip++; diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c index e33e8d3..03deabc 100644 --- a/drivers/mtd/onenand/onenand_base.c +++ b/drivers/mtd/onenand/onenand_base.c @@ -22,6 +22,7 @@ #include <common.h> #include <linux/compat.h> #include <linux/mtd/mtd.h> +#include "linux/mtd/flashchip.h" #include <linux/mtd/onenand.h> #include <asm/io.h> diff --git a/drivers/mtd/onenand/onenand_bbt.c b/drivers/mtd/onenand/onenand_bbt.c index 0267c2c..52509f1 100644 --- a/drivers/mtd/onenand/onenand_bbt.c +++ b/drivers/mtd/onenand/onenand_bbt.c @@ -140,7 +140,6 @@ static inline int onenand_memory_bbt(struct mtd_info *mtd, { unsigned char data_buf[MAX_ONENAND_PAGESIZE]; - bd->options &= ~NAND_BBT_SCANEMPTY; return create_bbt(mtd, data_buf, bd, -1); } diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c index df04c2b..5e56a29 100644 --- a/drivers/mtd/onenand/samsung.c +++ b/drivers/mtd/onenand/samsung.c @@ -15,20 +15,12 @@ #include <linux/compat.h> #include <linux/mtd/mtd.h> #include <linux/mtd/onenand.h> +#include <linux/mtd/flashchip.h> #include <linux/mtd/samsung_onenand.h> #include <asm/io.h> #include <asm/errno.h> -#ifdef ONENAND_DEBUG -#define DPRINTK(format, args...) \ -do { \ - printf("%s[%d]: " format "\n", __func__, __LINE__, ##args); \ -} while (0) -#else -#define DPRINTK(...) do { } while (0) -#endif - #define ONENAND_ERASE_STATUS 0x00 #define ONENAND_MULTI_ERASE_SET 0x01 #define ONENAND_ERASE_START 0x03 diff --git a/drivers/mtd/ubi/Makefile b/drivers/mtd/ubi/Makefile index 56c2823..4807f94 100644 --- a/drivers/mtd/ubi/Makefile +++ b/drivers/mtd/ubi/Makefile @@ -5,6 +5,7 @@ # SPDX-License-Identifier: GPL-2.0+ # -obj-y += build.o vtbl.o vmt.o upd.o kapi.o eba.o io.o wl.o scan.o crc32.o +obj-y += attach.o build.o vtbl.o vmt.o upd.o kapi.o eba.o io.o wl.o crc32.o +obj-$(CONFIG_MTD_UBI_FASTMAP) += fastmap.o obj-y += misc.o obj-y += debug.o diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c new file mode 100644 index 0000000..9fce02e --- /dev/null +++ b/drivers/mtd/ubi/attach.c @@ -0,0 +1,1754 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * SPDX-License-Identifier: GPL-2.0+ + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * UBI attaching sub-system. + * + * This sub-system is responsible for attaching MTD devices and it also + * implements flash media scanning. + * + * The attaching information is represented by a &struct ubi_attach_info' + * object. Information about volumes is represented by &struct ubi_ainf_volume + * objects which are kept in volume RB-tree with root at the @volumes field. + * The RB-tree is indexed by the volume ID. + * + * Logical eraseblocks are represented by &struct ubi_ainf_peb objects. These + * objects are kept in per-volume RB-trees with the root at the corresponding + * &struct ubi_ainf_volume object. To put it differently, we keep an RB-tree of + * per-volume objects and each of these objects is the root of RB-tree of + * per-LEB objects. + * + * Corrupted physical eraseblocks are put to the @corr list, free physical + * eraseblocks are put to the @free list and the physical eraseblock to be + * erased are put to the @erase list. + * + * About corruptions + * ~~~~~~~~~~~~~~~~~ + * + * UBI protects EC and VID headers with CRC-32 checksums, so it can detect + * whether the headers are corrupted or not. Sometimes UBI also protects the + * data with CRC-32, e.g., when it executes the atomic LEB change operation, or + * when it moves the contents of a PEB for wear-leveling purposes. + * + * UBI tries to distinguish between 2 types of corruptions. + * + * 1. Corruptions caused by power cuts. These are expected corruptions and UBI + * tries to handle them gracefully, without printing too many warnings and + * error messages. The idea is that we do not lose important data in these + * cases - we may lose only the data which were being written to the media just + * before the power cut happened, and the upper layers (e.g., UBIFS) are + * supposed to handle such data losses (e.g., by using the FS journal). + * + * When UBI detects a corruption (CRC-32 mismatch) in a PEB, and it looks like + * the reason is a power cut, UBI puts this PEB to the @erase list, and all + * PEBs in the @erase list are scheduled for erasure later. + * + * 2. Unexpected corruptions which are not caused by power cuts. During + * attaching, such PEBs are put to the @corr list and UBI preserves them. + * Obviously, this lessens the amount of available PEBs, and if at some point + * UBI runs out of free PEBs, it switches to R/O mode. UBI also loudly informs + * about such PEBs every time the MTD device is attached. + * + * However, it is difficult to reliably distinguish between these types of + * corruptions and UBI's strategy is as follows (in case of attaching by + * scanning). UBI assumes corruption type 2 if the VID header is corrupted and + * the data area does not contain all 0xFFs, and there were no bit-flips or + * integrity errors (e.g., ECC errors in case of NAND) while reading the data + * area. Otherwise UBI assumes corruption type 1. So the decision criteria + * are as follows. + * o If the data area contains only 0xFFs, there are no data, and it is safe + * to just erase this PEB - this is corruption type 1. + * o If the data area has bit-flips or data integrity errors (ECC errors on + * NAND), it is probably a PEB which was being erased when power cut + * happened, so this is corruption type 1. However, this is just a guess, + * which might be wrong. + * o Otherwise this is corruption type 2. + */ + +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/crc32.h> +#include <linux/random.h> +#else +#include <div64.h> +#include <linux/err.h> +#endif + +#include <linux/math64.h> + +#include <ubi_uboot.h> +#include "ubi.h" + +static int self_check_ai(struct ubi_device *ubi, struct ubi_attach_info *ai); + +/* Temporary variables used during scanning */ +static struct ubi_ec_hdr *ech; +static struct ubi_vid_hdr *vidh; + +/** + * add_to_list - add physical eraseblock to a list. + * @ai: attaching information + * @pnum: physical eraseblock number to add + * @vol_id: the last used volume id for the PEB + * @lnum: the last used LEB number for the PEB + * @ec: erase counter of the physical eraseblock + * @to_head: if not zero, add to the head of the list + * @list: the list to add to + * + * This function allocates a 'struct ubi_ainf_peb' object for physical + * eraseblock @pnum and adds it to the "free", "erase", or "alien" lists. + * It stores the @lnum and @vol_id alongside, which can both be + * %UBI_UNKNOWN if they are not available, not readable, or not assigned. + * If @to_head is not zero, PEB will be added to the head of the list, which + * basically means it will be processed first later. E.g., we add corrupted + * PEBs (corrupted due to power cuts) to the head of the erase list to make + * sure we erase them first and get rid of corruptions ASAP. This function + * returns zero in case of success and a negative error code in case of + * failure. + */ +static int add_to_list(struct ubi_attach_info *ai, int pnum, int vol_id, + int lnum, int ec, int to_head, struct list_head *list) +{ + struct ubi_ainf_peb *aeb; + + if (list == &ai->free) { + dbg_bld("add to free: PEB %d, EC %d", pnum, ec); + } else if (list == &ai->erase) { + dbg_bld("add to erase: PEB %d, EC %d", pnum, ec); + } else if (list == &ai->alien) { + dbg_bld("add to alien: PEB %d, EC %d", pnum, ec); + ai->alien_peb_count += 1; + } else + BUG(); + + aeb = kmem_cache_alloc(ai->aeb_slab_cache, GFP_KERNEL); + if (!aeb) + return -ENOMEM; + + aeb->pnum = pnum; + aeb->vol_id = vol_id; + aeb->lnum = lnum; + aeb->ec = ec; + if (to_head) + list_add(&aeb->u.list, list); + else + list_add_tail(&aeb->u.list, list); + return 0; +} + +/** + * add_corrupted - add a corrupted physical eraseblock. + * @ai: attaching information + * @pnum: physical eraseblock number to add + * @ec: erase counter of the physical eraseblock + * + * This function allocates a 'struct ubi_ainf_peb' object for a corrupted + * physical eraseblock @pnum and adds it to the 'corr' list. The corruption + * was presumably not caused by a power cut. Returns zero in case of success + * and a negative error code in case of failure. + */ +static int add_corrupted(struct ubi_attach_info *ai, int pnum, int ec) +{ + struct ubi_ainf_peb *aeb; + + dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec); + + aeb = kmem_cache_alloc(ai->aeb_slab_cache, GFP_KERNEL); + if (!aeb) + return -ENOMEM; + + ai->corr_peb_count += 1; + aeb->pnum = pnum; + aeb->ec = ec; + list_add(&aeb->u.list, &ai->corr); + return 0; +} + +/** + * validate_vid_hdr - check volume identifier header. + * @vid_hdr: the volume identifier header to check + * @av: information about the volume this logical eraseblock belongs to + * @pnum: physical eraseblock number the VID header came from + * + * This function checks that data stored in @vid_hdr is consistent. Returns + * non-zero if an inconsistency was found and zero if not. + * + * Note, UBI does sanity check of everything it reads from the flash media. + * Most of the checks are done in the I/O sub-system. Here we check that the + * information in the VID header is consistent to the information in other VID + * headers of the same volume. + */ +static int validate_vid_hdr(const struct ubi_vid_hdr *vid_hdr, + const struct ubi_ainf_volume *av, int pnum) +{ + int vol_type = vid_hdr->vol_type; + int vol_id = be32_to_cpu(vid_hdr->vol_id); + int used_ebs = be32_to_cpu(vid_hdr->used_ebs); + int data_pad = be32_to_cpu(vid_hdr->data_pad); + + if (av->leb_count != 0) { + int av_vol_type; + + /* + * This is not the first logical eraseblock belonging to this + * volume. Ensure that the data in its VID header is consistent + * to the data in previous logical eraseblock headers. + */ + + if (vol_id != av->vol_id) { + ubi_err("inconsistent vol_id"); + goto bad; + } + + if (av->vol_type == UBI_STATIC_VOLUME) + av_vol_type = UBI_VID_STATIC; + else + av_vol_type = UBI_VID_DYNAMIC; + + if (vol_type != av_vol_type) { + ubi_err("inconsistent vol_type"); + goto bad; + } + + if (used_ebs != av->used_ebs) { + ubi_err("inconsistent used_ebs"); + goto bad; + } + + if (data_pad != av->data_pad) { + ubi_err("inconsistent data_pad"); + goto bad; + } + } + + return 0; + +bad: + ubi_err("inconsistent VID header at PEB %d", pnum); + ubi_dump_vid_hdr(vid_hdr); + ubi_dump_av(av); + return -EINVAL; +} + +/** + * add_volume - add volume to the attaching information. + * @ai: attaching information + * @vol_id: ID of the volume to add + * @pnum: physical eraseblock number + * @vid_hdr: volume identifier header + * + * If the volume corresponding to the @vid_hdr logical eraseblock is already + * present in the attaching information, this function does nothing. Otherwise + * it adds corresponding volume to the attaching information. Returns a pointer + * to the allocated "av" object in case of success and a negative error code in + * case of failure. + */ +static struct ubi_ainf_volume *add_volume(struct ubi_attach_info *ai, + int vol_id, int pnum, + const struct ubi_vid_hdr *vid_hdr) +{ + struct ubi_ainf_volume *av; + struct rb_node **p = &ai->volumes.rb_node, *parent = NULL; + + ubi_assert(vol_id == be32_to_cpu(vid_hdr->vol_id)); + + /* Walk the volume RB-tree to look if this volume is already present */ + while (*p) { + parent = *p; + av = rb_entry(parent, struct ubi_ainf_volume, rb); + + if (vol_id == av->vol_id) + return av; + + if (vol_id > av->vol_id) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + /* The volume is absent - add it */ + av = kmalloc(sizeof(struct ubi_ainf_volume), GFP_KERNEL); + if (!av) + return ERR_PTR(-ENOMEM); + + av->highest_lnum = av->leb_count = 0; + av->vol_id = vol_id; + av->root = RB_ROOT; + av->used_ebs = be32_to_cpu(vid_hdr->used_ebs); + av->data_pad = be32_to_cpu(vid_hdr->data_pad); + av->compat = vid_hdr->compat; + av->vol_type = vid_hdr->vol_type == UBI_VID_DYNAMIC ? UBI_DYNAMIC_VOLUME + : UBI_STATIC_VOLUME; + if (vol_id > ai->highest_vol_id) + ai->highest_vol_id = vol_id; + + rb_link_node(&av->rb, parent, p); + rb_insert_color(&av->rb, &ai->volumes); + ai->vols_found += 1; + dbg_bld("added volume %d", vol_id); + return av; +} + +/** + * ubi_compare_lebs - find out which logical eraseblock is newer. + * @ubi: UBI device description object + * @aeb: first logical eraseblock to compare + * @pnum: physical eraseblock number of the second logical eraseblock to + * compare + * @vid_hdr: volume identifier header of the second logical eraseblock + * + * This function compares 2 copies of a LEB and informs which one is newer. In + * case of success this function returns a positive value, in case of failure, a + * negative error code is returned. The success return codes use the following + * bits: + * o bit 0 is cleared: the first PEB (described by @aeb) is newer than the + * second PEB (described by @pnum and @vid_hdr); + * o bit 0 is set: the second PEB is newer; + * o bit 1 is cleared: no bit-flips were detected in the newer LEB; + * o bit 1 is set: bit-flips were detected in the newer LEB; + * o bit 2 is cleared: the older LEB is not corrupted; + * o bit 2 is set: the older LEB is corrupted. + */ +int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, + int pnum, const struct ubi_vid_hdr *vid_hdr) +{ + int len, err, second_is_newer, bitflips = 0, corrupted = 0; + uint32_t data_crc, crc; + struct ubi_vid_hdr *vh = NULL; + unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum); + + if (sqnum2 == aeb->sqnum) { + /* + * This must be a really ancient UBI image which has been + * created before sequence numbers support has been added. At + * that times we used 32-bit LEB versions stored in logical + * eraseblocks. That was before UBI got into mainline. We do not + * support these images anymore. Well, those images still work, + * but only if no unclean reboots happened. + */ + ubi_err("unsupported on-flash UBI format"); + return -EINVAL; + } + + /* Obviously the LEB with lower sequence counter is older */ + second_is_newer = (sqnum2 > aeb->sqnum); + + /* + * Now we know which copy is newer. If the copy flag of the PEB with + * newer version is not set, then we just return, otherwise we have to + * check data CRC. For the second PEB we already have the VID header, + * for the first one - we'll need to re-read it from flash. + * + * Note: this may be optimized so that we wouldn't read twice. + */ + + if (second_is_newer) { + if (!vid_hdr->copy_flag) { + /* It is not a copy, so it is newer */ + dbg_bld("second PEB %d is newer, copy_flag is unset", + pnum); + return 1; + } + } else { + if (!aeb->copy_flag) { + /* It is not a copy, so it is newer */ + dbg_bld("first PEB %d is newer, copy_flag is unset", + pnum); + return bitflips << 1; + } + + vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!vh) + return -ENOMEM; + + pnum = aeb->pnum; + err = ubi_io_read_vid_hdr(ubi, pnum, vh, 0); + if (err) { + if (err == UBI_IO_BITFLIPS) + bitflips = 1; + else { + ubi_err("VID of PEB %d header is bad, but it was OK earlier, err %d", + pnum, err); + if (err > 0) + err = -EIO; + + goto out_free_vidh; + } + } + + vid_hdr = vh; + } + + /* Read the data of the copy and check the CRC */ + + len = be32_to_cpu(vid_hdr->data_size); + + mutex_lock(&ubi->buf_mutex); + err = ubi_io_read_data(ubi, ubi->peb_buf, pnum, 0, len); + if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err)) + goto out_unlock; + + data_crc = be32_to_cpu(vid_hdr->data_crc); + crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, len); + if (crc != data_crc) { + dbg_bld("PEB %d CRC error: calculated %#08x, must be %#08x", + pnum, crc, data_crc); + corrupted = 1; + bitflips = 0; + second_is_newer = !second_is_newer; + } else { + dbg_bld("PEB %d CRC is OK", pnum); + bitflips = !!err; + } + mutex_unlock(&ubi->buf_mutex); + + ubi_free_vid_hdr(ubi, vh); + + if (second_is_newer) + dbg_bld("second PEB %d is newer, copy_flag is set", pnum); + else + dbg_bld("first PEB %d is newer, copy_flag is set", pnum); + + return second_is_newer | (bitflips << 1) | (corrupted << 2); + +out_unlock: + mutex_unlock(&ubi->buf_mutex); +out_free_vidh: + ubi_free_vid_hdr(ubi, vh); + return err; +} + +/** + * ubi_add_to_av - add used physical eraseblock to the attaching information. + * @ubi: UBI device description object + * @ai: attaching information + * @pnum: the physical eraseblock number + * @ec: erase counter + * @vid_hdr: the volume identifier header + * @bitflips: if bit-flips were detected when this physical eraseblock was read + * + * This function adds information about a used physical eraseblock to the + * 'used' tree of the corresponding volume. The function is rather complex + * because it has to handle cases when this is not the first physical + * eraseblock belonging to the same logical eraseblock, and the newer one has + * to be picked, while the older one has to be dropped. This function returns + * zero in case of success and a negative error code in case of failure. + */ +int ubi_add_to_av(struct ubi_device *ubi, struct ubi_attach_info *ai, int pnum, + int ec, const struct ubi_vid_hdr *vid_hdr, int bitflips) +{ + int err, vol_id, lnum; + unsigned long long sqnum; + struct ubi_ainf_volume *av; + struct ubi_ainf_peb *aeb; + struct rb_node **p, *parent = NULL; + + vol_id = be32_to_cpu(vid_hdr->vol_id); + lnum = be32_to_cpu(vid_hdr->lnum); + sqnum = be64_to_cpu(vid_hdr->sqnum); + + dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, bitflips %d", + pnum, vol_id, lnum, ec, sqnum, bitflips); + + av = add_volume(ai, vol_id, pnum, vid_hdr); + if (IS_ERR(av)) + return PTR_ERR(av); + + if (ai->max_sqnum < sqnum) + ai->max_sqnum = sqnum; + + /* + * Walk the RB-tree of logical eraseblocks of volume @vol_id to look + * if this is the first instance of this logical eraseblock or not. + */ + p = &av->root.rb_node; + while (*p) { + int cmp_res; + + parent = *p; + aeb = rb_entry(parent, struct ubi_ainf_peb, u.rb); + if (lnum != aeb->lnum) { + if (lnum < aeb->lnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + continue; + } + + /* + * There is already a physical eraseblock describing the same + * logical eraseblock present. + */ + + dbg_bld("this LEB already exists: PEB %d, sqnum %llu, EC %d", + aeb->pnum, aeb->sqnum, aeb->ec); + + /* + * Make sure that the logical eraseblocks have different + * sequence numbers. Otherwise the image is bad. + * + * However, if the sequence number is zero, we assume it must + * be an ancient UBI image from the era when UBI did not have + * sequence numbers. We still can attach these images, unless + * there is a need to distinguish between old and new + * eraseblocks, in which case we'll refuse the image in + * 'ubi_compare_lebs()'. In other words, we attach old clean + * images, but refuse attaching old images with duplicated + * logical eraseblocks because there was an unclean reboot. + */ + if (aeb->sqnum == sqnum && sqnum != 0) { + ubi_err("two LEBs with same sequence number %llu", + sqnum); + ubi_dump_aeb(aeb, 0); + ubi_dump_vid_hdr(vid_hdr); + return -EINVAL; + } + + /* + * Now we have to drop the older one and preserve the newer + * one. + */ + cmp_res = ubi_compare_lebs(ubi, aeb, pnum, vid_hdr); + if (cmp_res < 0) + return cmp_res; + + if (cmp_res & 1) { + /* + * This logical eraseblock is newer than the one + * found earlier. + */ + err = validate_vid_hdr(vid_hdr, av, pnum); + if (err) + return err; + + err = add_to_list(ai, aeb->pnum, aeb->vol_id, + aeb->lnum, aeb->ec, cmp_res & 4, + &ai->erase); + if (err) + return err; + + aeb->ec = ec; + aeb->pnum = pnum; + aeb->vol_id = vol_id; + aeb->lnum = lnum; + aeb->scrub = ((cmp_res & 2) || bitflips); + aeb->copy_flag = vid_hdr->copy_flag; + aeb->sqnum = sqnum; + + if (av->highest_lnum == lnum) + av->last_data_size = + be32_to_cpu(vid_hdr->data_size); + + return 0; + } else { + /* + * This logical eraseblock is older than the one found + * previously. + */ + return add_to_list(ai, pnum, vol_id, lnum, ec, + cmp_res & 4, &ai->erase); + } + } + + /* + * We've met this logical eraseblock for the first time, add it to the + * attaching information. + */ + + err = validate_vid_hdr(vid_hdr, av, pnum); + if (err) + return err; + + aeb = kmem_cache_alloc(ai->aeb_slab_cache, GFP_KERNEL); + if (!aeb) + return -ENOMEM; + + aeb->ec = ec; + aeb->pnum = pnum; + aeb->vol_id = vol_id; + aeb->lnum = lnum; + aeb->scrub = bitflips; + aeb->copy_flag = vid_hdr->copy_flag; + aeb->sqnum = sqnum; + + if (av->highest_lnum <= lnum) { + av->highest_lnum = lnum; + av->last_data_size = be32_to_cpu(vid_hdr->data_size); + } + + av->leb_count += 1; + rb_link_node(&aeb->u.rb, parent, p); + rb_insert_color(&aeb->u.rb, &av->root); + return 0; +} + +/** + * ubi_find_av - find volume in the attaching information. + * @ai: attaching information + * @vol_id: the requested volume ID + * + * This function returns a pointer to the volume description or %NULL if there + * are no data about this volume in the attaching information. + */ +struct ubi_ainf_volume *ubi_find_av(const struct ubi_attach_info *ai, + int vol_id) +{ + struct ubi_ainf_volume *av; + struct rb_node *p = ai->volumes.rb_node; + + while (p) { + av = rb_entry(p, struct ubi_ainf_volume, rb); + + if (vol_id == av->vol_id) + return av; + + if (vol_id > av->vol_id) + p = p->rb_left; + else + p = p->rb_right; + } + + return NULL; +} + +/** + * ubi_remove_av - delete attaching information about a volume. + * @ai: attaching information + * @av: the volume attaching information to delete + */ +void ubi_remove_av(struct ubi_attach_info *ai, struct ubi_ainf_volume *av) +{ + struct rb_node *rb; + struct ubi_ainf_peb *aeb; + + dbg_bld("remove attaching information about volume %d", av->vol_id); + + while ((rb = rb_first(&av->root))) { + aeb = rb_entry(rb, struct ubi_ainf_peb, u.rb); + rb_erase(&aeb->u.rb, &av->root); + list_add_tail(&aeb->u.list, &ai->erase); + } + + rb_erase(&av->rb, &ai->volumes); + kfree(av); + ai->vols_found -= 1; +} + +/** + * early_erase_peb - erase a physical eraseblock. + * @ubi: UBI device description object + * @ai: attaching information + * @pnum: physical eraseblock number to erase; + * @ec: erase counter value to write (%UBI_UNKNOWN if it is unknown) + * + * This function erases physical eraseblock 'pnum', and writes the erase + * counter header to it. This function should only be used on UBI device + * initialization stages, when the EBA sub-system had not been yet initialized. + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int early_erase_peb(struct ubi_device *ubi, + const struct ubi_attach_info *ai, int pnum, int ec) +{ + int err; + struct ubi_ec_hdr *ec_hdr; + + if ((long long)ec >= UBI_MAX_ERASECOUNTER) { + /* + * Erase counter overflow. Upgrade UBI and use 64-bit + * erase counters internally. + */ + ubi_err("erase counter overflow at PEB %d, EC %d", pnum, ec); + return -EINVAL; + } + + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ec_hdr) + return -ENOMEM; + + ec_hdr->ec = cpu_to_be64(ec); + + err = ubi_io_sync_erase(ubi, pnum, 0); + if (err < 0) + goto out_free; + + err = ubi_io_write_ec_hdr(ubi, pnum, ec_hdr); + +out_free: + kfree(ec_hdr); + return err; +} + +/** + * ubi_early_get_peb - get a free physical eraseblock. + * @ubi: UBI device description object + * @ai: attaching information + * + * This function returns a free physical eraseblock. It is supposed to be + * called on the UBI initialization stages when the wear-leveling sub-system is + * not initialized yet. This function picks a physical eraseblocks from one of + * the lists, writes the EC header if it is needed, and removes it from the + * list. + * + * This function returns a pointer to the "aeb" of the found free PEB in case + * of success and an error code in case of failure. + */ +struct ubi_ainf_peb *ubi_early_get_peb(struct ubi_device *ubi, + struct ubi_attach_info *ai) +{ + int err = 0; + struct ubi_ainf_peb *aeb, *tmp_aeb; + + if (!list_empty(&ai->free)) { + aeb = list_entry(ai->free.next, struct ubi_ainf_peb, u.list); + list_del(&aeb->u.list); + dbg_bld("return free PEB %d, EC %d", aeb->pnum, aeb->ec); + return aeb; + } + + /* + * We try to erase the first physical eraseblock from the erase list + * and pick it if we succeed, or try to erase the next one if not. And + * so forth. We don't want to take care about bad eraseblocks here - + * they'll be handled later. + */ + list_for_each_entry_safe(aeb, tmp_aeb, &ai->erase, u.list) { + if (aeb->ec == UBI_UNKNOWN) + aeb->ec = ai->mean_ec; + + err = early_erase_peb(ubi, ai, aeb->pnum, aeb->ec+1); + if (err) + continue; + + aeb->ec += 1; + list_del(&aeb->u.list); + dbg_bld("return PEB %d, EC %d", aeb->pnum, aeb->ec); + return aeb; + } + + ubi_err("no free eraseblocks"); + return ERR_PTR(-ENOSPC); +} + +/** + * check_corruption - check the data area of PEB. + * @ubi: UBI device description object + * @vid_hdr: the (corrupted) VID header of this PEB + * @pnum: the physical eraseblock number to check + * + * This is a helper function which is used to distinguish between VID header + * corruptions caused by power cuts and other reasons. If the PEB contains only + * 0xFF bytes in the data area, the VID header is most probably corrupted + * because of a power cut (%0 is returned in this case). Otherwise, it was + * probably corrupted for some other reasons (%1 is returned in this case). A + * negative error code is returned if a read error occurred. + * + * If the corruption reason was a power cut, UBI can safely erase this PEB. + * Otherwise, it should preserve it to avoid possibly destroying important + * information. + */ +static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, + int pnum) +{ + int err; + + mutex_lock(&ubi->buf_mutex); + memset(ubi->peb_buf, 0x00, ubi->leb_size); + + err = ubi_io_read(ubi, ubi->peb_buf, pnum, ubi->leb_start, + ubi->leb_size); + if (err == UBI_IO_BITFLIPS || mtd_is_eccerr(err)) { + /* + * Bit-flips or integrity errors while reading the data area. + * It is difficult to say for sure what type of corruption is + * this, but presumably a power cut happened while this PEB was + * erased, so it became unstable and corrupted, and should be + * erased. + */ + err = 0; + goto out_unlock; + } + + if (err) + goto out_unlock; + + if (ubi_check_pattern(ubi->peb_buf, 0xFF, ubi->leb_size)) + goto out_unlock; + + ubi_err("PEB %d contains corrupted VID header, and the data does not contain all 0xFF", + pnum); + ubi_err("this may be a non-UBI PEB or a severe VID header corruption which requires manual inspection"); + ubi_dump_vid_hdr(vid_hdr); + pr_err("hexdump of PEB %d offset %d, length %d", + pnum, ubi->leb_start, ubi->leb_size); + ubi_dbg_print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + ubi->peb_buf, ubi->leb_size, 1); + err = 1; + +out_unlock: + mutex_unlock(&ubi->buf_mutex); + return err; +} + +/** + * scan_peb - scan and process UBI headers of a PEB. + * @ubi: UBI device description object + * @ai: attaching information + * @pnum: the physical eraseblock number + * @vid: The volume ID of the found volume will be stored in this pointer + * @sqnum: The sqnum of the found volume will be stored in this pointer + * + * This function reads UBI headers of PEB @pnum, checks them, and adds + * information about this PEB to the corresponding list or RB-tree in the + * "attaching info" structure. Returns zero if the physical eraseblock was + * successfully handled and a negative error code in case of failure. + */ +static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, + int pnum, int *vid, unsigned long long *sqnum) +{ + long long uninitialized_var(ec); + int err, bitflips = 0, vol_id = -1, ec_err = 0; + + dbg_bld("scan PEB %d", pnum); + + /* Skip bad physical eraseblocks */ + err = ubi_io_is_bad(ubi, pnum); + if (err < 0) + return err; + else if (err) { + ai->bad_peb_count += 1; + return 0; + } + + err = ubi_io_read_ec_hdr(ubi, pnum, ech, 0); + if (err < 0) + return err; + switch (err) { + case 0: + break; + case UBI_IO_BITFLIPS: + bitflips = 1; + break; + case UBI_IO_FF: + ai->empty_peb_count += 1; + return add_to_list(ai, pnum, UBI_UNKNOWN, UBI_UNKNOWN, + UBI_UNKNOWN, 0, &ai->erase); + case UBI_IO_FF_BITFLIPS: + ai->empty_peb_count += 1; + return add_to_list(ai, pnum, UBI_UNKNOWN, UBI_UNKNOWN, + UBI_UNKNOWN, 1, &ai->erase); + case UBI_IO_BAD_HDR_EBADMSG: + case UBI_IO_BAD_HDR: + /* + * We have to also look at the VID header, possibly it is not + * corrupted. Set %bitflips flag in order to make this PEB be + * moved and EC be re-created. + */ + ec_err = err; + ec = UBI_UNKNOWN; + bitflips = 1; + break; + default: + ubi_err("'ubi_io_read_ec_hdr()' returned unknown code %d", err); + return -EINVAL; + } + + if (!ec_err) { + int image_seq; + + /* Make sure UBI version is OK */ + if (ech->version != UBI_VERSION) { + ubi_err("this UBI version is %d, image version is %d", + UBI_VERSION, (int)ech->version); + return -EINVAL; + } + + ec = be64_to_cpu(ech->ec); + if (ec > UBI_MAX_ERASECOUNTER) { + /* + * Erase counter overflow. The EC headers have 64 bits + * reserved, but we anyway make use of only 31 bit + * values, as this seems to be enough for any existing + * flash. Upgrade UBI and use 64-bit erase counters + * internally. + */ + ubi_err("erase counter overflow, max is %d", + UBI_MAX_ERASECOUNTER); + ubi_dump_ec_hdr(ech); + return -EINVAL; + } + + /* + * Make sure that all PEBs have the same image sequence number. + * This allows us to detect situations when users flash UBI + * images incorrectly, so that the flash has the new UBI image + * and leftovers from the old one. This feature was added + * relatively recently, and the sequence number was always + * zero, because old UBI implementations always set it to zero. + * For this reasons, we do not panic if some PEBs have zero + * sequence number, while other PEBs have non-zero sequence + * number. + */ + image_seq = be32_to_cpu(ech->image_seq); + if (!ubi->image_seq) + ubi->image_seq = image_seq; + if (image_seq && ubi->image_seq != image_seq) { + ubi_err("bad image sequence number %d in PEB %d, expected %d", + image_seq, pnum, ubi->image_seq); + ubi_dump_ec_hdr(ech); + return -EINVAL; + } + } + + /* OK, we've done with the EC header, let's look at the VID header */ + + err = ubi_io_read_vid_hdr(ubi, pnum, vidh, 0); + if (err < 0) + return err; + switch (err) { + case 0: + break; + case UBI_IO_BITFLIPS: + bitflips = 1; + break; + case UBI_IO_BAD_HDR_EBADMSG: + if (ec_err == UBI_IO_BAD_HDR_EBADMSG) + /* + * Both EC and VID headers are corrupted and were read + * with data integrity error, probably this is a bad + * PEB, bit it is not marked as bad yet. This may also + * be a result of power cut during erasure. + */ + ai->maybe_bad_peb_count += 1; + case UBI_IO_BAD_HDR: + if (ec_err) + /* + * Both headers are corrupted. There is a possibility + * that this a valid UBI PEB which has corresponding + * LEB, but the headers are corrupted. However, it is + * impossible to distinguish it from a PEB which just + * contains garbage because of a power cut during erase + * operation. So we just schedule this PEB for erasure. + * + * Besides, in case of NOR flash, we deliberately + * corrupt both headers because NOR flash erasure is + * slow and can start from the end. + */ + err = 0; + else + /* + * The EC was OK, but the VID header is corrupted. We + * have to check what is in the data area. + */ + err = check_corruption(ubi, vidh, pnum); + + if (err < 0) + return err; + else if (!err) + /* This corruption is caused by a power cut */ + err = add_to_list(ai, pnum, UBI_UNKNOWN, + UBI_UNKNOWN, ec, 1, &ai->erase); + else + /* This is an unexpected corruption */ + err = add_corrupted(ai, pnum, ec); + if (err) + return err; + goto adjust_mean_ec; + case UBI_IO_FF_BITFLIPS: + err = add_to_list(ai, pnum, UBI_UNKNOWN, UBI_UNKNOWN, + ec, 1, &ai->erase); + if (err) + return err; + goto adjust_mean_ec; + case UBI_IO_FF: + if (ec_err || bitflips) + err = add_to_list(ai, pnum, UBI_UNKNOWN, + UBI_UNKNOWN, ec, 1, &ai->erase); + else + err = add_to_list(ai, pnum, UBI_UNKNOWN, + UBI_UNKNOWN, ec, 0, &ai->free); + if (err) + return err; + goto adjust_mean_ec; + default: + ubi_err("'ubi_io_read_vid_hdr()' returned unknown code %d", + err); + return -EINVAL; + } + + vol_id = be32_to_cpu(vidh->vol_id); + if (vid) + *vid = vol_id; + if (sqnum) + *sqnum = be64_to_cpu(vidh->sqnum); + if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOLUME_ID) { + int lnum = be32_to_cpu(vidh->lnum); + + /* Unsupported internal volume */ + switch (vidh->compat) { + case UBI_COMPAT_DELETE: + if (vol_id != UBI_FM_SB_VOLUME_ID + && vol_id != UBI_FM_DATA_VOLUME_ID) { + ubi_msg("\"delete\" compatible internal volume %d:%d found, will remove it", + vol_id, lnum); + } + err = add_to_list(ai, pnum, vol_id, lnum, + ec, 1, &ai->erase); + if (err) + return err; + return 0; + + case UBI_COMPAT_RO: + ubi_msg("read-only compatible internal volume %d:%d found, switch to read-only mode", + vol_id, lnum); + ubi->ro_mode = 1; + break; + + case UBI_COMPAT_PRESERVE: + ubi_msg("\"preserve\" compatible internal volume %d:%d found", + vol_id, lnum); + err = add_to_list(ai, pnum, vol_id, lnum, + ec, 0, &ai->alien); + if (err) + return err; + return 0; + + case UBI_COMPAT_REJECT: + ubi_err("incompatible internal volume %d:%d found", + vol_id, lnum); + return -EINVAL; + } + } + + if (ec_err) + ubi_warn("valid VID header but corrupted EC header at PEB %d", + pnum); + err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips); + if (err) + return err; + +adjust_mean_ec: + if (!ec_err) { + ai->ec_sum += ec; + ai->ec_count += 1; + if (ec > ai->max_ec) + ai->max_ec = ec; + if (ec < ai->min_ec) + ai->min_ec = ec; + } + + return 0; +} + +/** + * late_analysis - analyze the overall situation with PEB. + * @ubi: UBI device description object + * @ai: attaching information + * + * This is a helper function which takes a look what PEBs we have after we + * gather information about all of them ("ai" is compete). It decides whether + * the flash is empty and should be formatted of whether there are too many + * corrupted PEBs and we should not attach this MTD device. Returns zero if we + * should proceed with attaching the MTD device, and %-EINVAL if we should not. + */ +static int late_analysis(struct ubi_device *ubi, struct ubi_attach_info *ai) +{ + struct ubi_ainf_peb *aeb; + int max_corr, peb_count; + + peb_count = ubi->peb_count - ai->bad_peb_count - ai->alien_peb_count; + max_corr = peb_count / 20 ?: 8; + + /* + * Few corrupted PEBs is not a problem and may be just a result of + * unclean reboots. However, many of them may indicate some problems + * with the flash HW or driver. + */ + if (ai->corr_peb_count) { + ubi_err("%d PEBs are corrupted and preserved", + ai->corr_peb_count); + pr_err("Corrupted PEBs are:"); + list_for_each_entry(aeb, &ai->corr, u.list) + pr_cont(" %d", aeb->pnum); + pr_cont("\n"); + + /* + * If too many PEBs are corrupted, we refuse attaching, + * otherwise, only print a warning. + */ + if (ai->corr_peb_count >= max_corr) { + ubi_err("too many corrupted PEBs, refusing"); + return -EINVAL; + } + } + + if (ai->empty_peb_count + ai->maybe_bad_peb_count == peb_count) { + /* + * All PEBs are empty, or almost all - a couple PEBs look like + * they may be bad PEBs which were not marked as bad yet. + * + * This piece of code basically tries to distinguish between + * the following situations: + * + * 1. Flash is empty, but there are few bad PEBs, which are not + * marked as bad so far, and which were read with error. We + * want to go ahead and format this flash. While formatting, + * the faulty PEBs will probably be marked as bad. + * + * 2. Flash contains non-UBI data and we do not want to format + * it and destroy possibly important information. + */ + if (ai->maybe_bad_peb_count <= 2) { + ai->is_empty = 1; + ubi_msg("empty MTD device detected"); + get_random_bytes(&ubi->image_seq, + sizeof(ubi->image_seq)); + } else { + ubi_err("MTD device is not UBI-formatted and possibly contains non-UBI data - refusing it"); + return -EINVAL; + } + + } + + return 0; +} + +/** + * destroy_av - free volume attaching information. + * @av: volume attaching information + * @ai: attaching information + * + * This function destroys the volume attaching information. + */ +static void destroy_av(struct ubi_attach_info *ai, struct ubi_ainf_volume *av) +{ + struct ubi_ainf_peb *aeb; + struct rb_node *this = av->root.rb_node; + + while (this) { + if (this->rb_left) + this = this->rb_left; + else if (this->rb_right) + this = this->rb_right; + else { + aeb = rb_entry(this, struct ubi_ainf_peb, u.rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &aeb->u.rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + + kmem_cache_free(ai->aeb_slab_cache, aeb); + } + } + kfree(av); +} + +/** + * destroy_ai - destroy attaching information. + * @ai: attaching information + */ +static void destroy_ai(struct ubi_attach_info *ai) +{ + struct ubi_ainf_peb *aeb, *aeb_tmp; + struct ubi_ainf_volume *av; + struct rb_node *rb; + + list_for_each_entry_safe(aeb, aeb_tmp, &ai->alien, u.list) { + list_del(&aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, aeb); + } + list_for_each_entry_safe(aeb, aeb_tmp, &ai->erase, u.list) { + list_del(&aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, aeb); + } + list_for_each_entry_safe(aeb, aeb_tmp, &ai->corr, u.list) { + list_del(&aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, aeb); + } + list_for_each_entry_safe(aeb, aeb_tmp, &ai->free, u.list) { + list_del(&aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, aeb); + } + + /* Destroy the volume RB-tree */ + rb = ai->volumes.rb_node; + while (rb) { + if (rb->rb_left) + rb = rb->rb_left; + else if (rb->rb_right) + rb = rb->rb_right; + else { + av = rb_entry(rb, struct ubi_ainf_volume, rb); + + rb = rb_parent(rb); + if (rb) { + if (rb->rb_left == &av->rb) + rb->rb_left = NULL; + else + rb->rb_right = NULL; + } + + destroy_av(ai, av); + } + } + + if (ai->aeb_slab_cache) + kmem_cache_destroy(ai->aeb_slab_cache); + + kfree(ai); +} + +/** + * scan_all - scan entire MTD device. + * @ubi: UBI device description object + * @ai: attach info object + * @start: start scanning at this PEB + * + * This function does full scanning of an MTD device and returns complete + * information about it in form of a "struct ubi_attach_info" object. In case + * of failure, an error code is returned. + */ +static int scan_all(struct ubi_device *ubi, struct ubi_attach_info *ai, + int start) +{ + int err, pnum; + struct rb_node *rb1, *rb2; + struct ubi_ainf_volume *av; + struct ubi_ainf_peb *aeb; + + err = -ENOMEM; + + ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ech) + return err; + + vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!vidh) + goto out_ech; + + for (pnum = start; pnum < ubi->peb_count; pnum++) { + cond_resched(); + + dbg_gen("process PEB %d", pnum); + err = scan_peb(ubi, ai, pnum, NULL, NULL); + if (err < 0) + goto out_vidh; + } + + ubi_msg("scanning is finished"); + + /* Calculate mean erase counter */ + if (ai->ec_count) + ai->mean_ec = div_u64(ai->ec_sum, ai->ec_count); + + err = late_analysis(ubi, ai); + if (err) + goto out_vidh; + + /* + * In case of unknown erase counter we use the mean erase counter + * value. + */ + ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) { + ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) + if (aeb->ec == UBI_UNKNOWN) + aeb->ec = ai->mean_ec; + } + + list_for_each_entry(aeb, &ai->free, u.list) { + if (aeb->ec == UBI_UNKNOWN) + aeb->ec = ai->mean_ec; + } + + list_for_each_entry(aeb, &ai->corr, u.list) + if (aeb->ec == UBI_UNKNOWN) + aeb->ec = ai->mean_ec; + + list_for_each_entry(aeb, &ai->erase, u.list) + if (aeb->ec == UBI_UNKNOWN) + aeb->ec = ai->mean_ec; + + err = self_check_ai(ubi, ai); + if (err) + goto out_vidh; + + ubi_free_vid_hdr(ubi, vidh); + kfree(ech); + + return 0; + +out_vidh: + ubi_free_vid_hdr(ubi, vidh); +out_ech: + kfree(ech); + return err; +} + +#ifdef CONFIG_MTD_UBI_FASTMAP + +/** + * scan_fastmap - try to find a fastmap and attach from it. + * @ubi: UBI device description object + * @ai: attach info object + * + * Returns 0 on success, negative return values indicate an internal + * error. + * UBI_NO_FASTMAP denotes that no fastmap was found. + * UBI_BAD_FASTMAP denotes that the found fastmap was invalid. + */ +static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info *ai) +{ + int err, pnum, fm_anchor = -1; + unsigned long long max_sqnum = 0; + + err = -ENOMEM; + + ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ech) + goto out; + + vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!vidh) + goto out_ech; + + for (pnum = 0; pnum < UBI_FM_MAX_START; pnum++) { + int vol_id = -1; + unsigned long long sqnum = -1; + cond_resched(); + + dbg_gen("process PEB %d", pnum); + err = scan_peb(ubi, ai, pnum, &vol_id, &sqnum); + if (err < 0) + goto out_vidh; + + if (vol_id == UBI_FM_SB_VOLUME_ID && sqnum > max_sqnum) { + max_sqnum = sqnum; + fm_anchor = pnum; + } + } + + ubi_free_vid_hdr(ubi, vidh); + kfree(ech); + + if (fm_anchor < 0) + return UBI_NO_FASTMAP; + + return ubi_scan_fastmap(ubi, ai, fm_anchor); + +out_vidh: + ubi_free_vid_hdr(ubi, vidh); +out_ech: + kfree(ech); +out: + return err; +} + +#endif + +static struct ubi_attach_info *alloc_ai(const char *slab_name) +{ + struct ubi_attach_info *ai; + + ai = kzalloc(sizeof(struct ubi_attach_info), GFP_KERNEL); + if (!ai) + return ai; + + INIT_LIST_HEAD(&ai->corr); + INIT_LIST_HEAD(&ai->free); + INIT_LIST_HEAD(&ai->erase); + INIT_LIST_HEAD(&ai->alien); + ai->volumes = RB_ROOT; + ai->aeb_slab_cache = kmem_cache_create(slab_name, + sizeof(struct ubi_ainf_peb), + 0, 0, NULL); + if (!ai->aeb_slab_cache) { + kfree(ai); + ai = NULL; + } + + return ai; +} + +/** + * ubi_attach - attach an MTD device. + * @ubi: UBI device descriptor + * @force_scan: if set to non-zero attach by scanning + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +int ubi_attach(struct ubi_device *ubi, int force_scan) +{ + int err; + struct ubi_attach_info *ai; + + ai = alloc_ai("ubi_aeb_slab_cache"); + if (!ai) + return -ENOMEM; + +#ifdef CONFIG_MTD_UBI_FASTMAP + /* On small flash devices we disable fastmap in any case. */ + if ((int)mtd_div_by_eb(ubi->mtd->size, ubi->mtd) <= UBI_FM_MAX_START) { + ubi->fm_disabled = 1; + force_scan = 1; + } + + if (force_scan) + err = scan_all(ubi, ai, 0); + else { + err = scan_fast(ubi, ai); + if (err > 0) { + if (err != UBI_NO_FASTMAP) { + destroy_ai(ai); + ai = alloc_ai("ubi_aeb_slab_cache2"); + if (!ai) + return -ENOMEM; + + err = scan_all(ubi, ai, 0); + } else { + err = scan_all(ubi, ai, UBI_FM_MAX_START); + } + } + } +#else + err = scan_all(ubi, ai, 0); +#endif + if (err) + goto out_ai; + + ubi->bad_peb_count = ai->bad_peb_count; + ubi->good_peb_count = ubi->peb_count - ubi->bad_peb_count; + ubi->corr_peb_count = ai->corr_peb_count; + ubi->max_ec = ai->max_ec; + ubi->mean_ec = ai->mean_ec; + dbg_gen("max. sequence number: %llu", ai->max_sqnum); + + err = ubi_read_volume_table(ubi, ai); + if (err) + goto out_ai; + + err = ubi_wl_init(ubi, ai); + if (err) + goto out_vtbl; + + err = ubi_eba_init(ubi, ai); + if (err) + goto out_wl; + +#ifdef CONFIG_MTD_UBI_FASTMAP + if (ubi->fm && ubi_dbg_chk_gen(ubi)) { + struct ubi_attach_info *scan_ai; + + scan_ai = alloc_ai("ubi_ckh_aeb_slab_cache"); + if (!scan_ai) { + err = -ENOMEM; + goto out_wl; + } + + err = scan_all(ubi, scan_ai, 0); + if (err) { + destroy_ai(scan_ai); + goto out_wl; + } + + err = self_check_eba(ubi, ai, scan_ai); + destroy_ai(scan_ai); + + if (err) + goto out_wl; + } +#endif + + destroy_ai(ai); + return 0; + +out_wl: + ubi_wl_close(ubi); +out_vtbl: + ubi_free_internal_volumes(ubi); + vfree(ubi->vtbl); +out_ai: + destroy_ai(ai); + return err; +} + +/** + * self_check_ai - check the attaching information. + * @ubi: UBI device description object + * @ai: attaching information + * + * This function returns zero if the attaching information is all right, and a + * negative error code if not or if an error occurred. + */ +static int self_check_ai(struct ubi_device *ubi, struct ubi_attach_info *ai) +{ + int pnum, err, vols_found = 0; + struct rb_node *rb1, *rb2; + struct ubi_ainf_volume *av; + struct ubi_ainf_peb *aeb, *last_aeb; + uint8_t *buf; + + if (!ubi_dbg_chk_gen(ubi)) + return 0; + + /* + * At first, check that attaching information is OK. + */ + ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) { + int leb_count = 0; + + cond_resched(); + + vols_found += 1; + + if (ai->is_empty) { + ubi_err("bad is_empty flag"); + goto bad_av; + } + + if (av->vol_id < 0 || av->highest_lnum < 0 || + av->leb_count < 0 || av->vol_type < 0 || av->used_ebs < 0 || + av->data_pad < 0 || av->last_data_size < 0) { + ubi_err("negative values"); + goto bad_av; + } + + if (av->vol_id >= UBI_MAX_VOLUMES && + av->vol_id < UBI_INTERNAL_VOL_START) { + ubi_err("bad vol_id"); + goto bad_av; + } + + if (av->vol_id > ai->highest_vol_id) { + ubi_err("highest_vol_id is %d, but vol_id %d is there", + ai->highest_vol_id, av->vol_id); + goto out; + } + + if (av->vol_type != UBI_DYNAMIC_VOLUME && + av->vol_type != UBI_STATIC_VOLUME) { + ubi_err("bad vol_type"); + goto bad_av; + } + + if (av->data_pad > ubi->leb_size / 2) { + ubi_err("bad data_pad"); + goto bad_av; + } + + last_aeb = NULL; + ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) { + cond_resched(); + + last_aeb = aeb; + leb_count += 1; + + if (aeb->pnum < 0 || aeb->ec < 0) { + ubi_err("negative values"); + goto bad_aeb; + } + + if (aeb->ec < ai->min_ec) { + ubi_err("bad ai->min_ec (%d), %d found", + ai->min_ec, aeb->ec); + goto bad_aeb; + } + + if (aeb->ec > ai->max_ec) { + ubi_err("bad ai->max_ec (%d), %d found", + ai->max_ec, aeb->ec); + goto bad_aeb; + } + + if (aeb->pnum >= ubi->peb_count) { + ubi_err("too high PEB number %d, total PEBs %d", + aeb->pnum, ubi->peb_count); + goto bad_aeb; + } + + if (av->vol_type == UBI_STATIC_VOLUME) { + if (aeb->lnum >= av->used_ebs) { + ubi_err("bad lnum or used_ebs"); + goto bad_aeb; + } + } else { + if (av->used_ebs != 0) { + ubi_err("non-zero used_ebs"); + goto bad_aeb; + } + } + + if (aeb->lnum > av->highest_lnum) { + ubi_err("incorrect highest_lnum or lnum"); + goto bad_aeb; + } + } + + if (av->leb_count != leb_count) { + ubi_err("bad leb_count, %d objects in the tree", + leb_count); + goto bad_av; + } + + if (!last_aeb) + continue; + + aeb = last_aeb; + + if (aeb->lnum != av->highest_lnum) { + ubi_err("bad highest_lnum"); + goto bad_aeb; + } + } + + if (vols_found != ai->vols_found) { + ubi_err("bad ai->vols_found %d, should be %d", + ai->vols_found, vols_found); + goto out; + } + + /* Check that attaching information is correct */ + ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) { + last_aeb = NULL; + ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) { + int vol_type; + + cond_resched(); + + last_aeb = aeb; + + err = ubi_io_read_vid_hdr(ubi, aeb->pnum, vidh, 1); + if (err && err != UBI_IO_BITFLIPS) { + ubi_err("VID header is not OK (%d)", err); + if (err > 0) + err = -EIO; + return err; + } + + vol_type = vidh->vol_type == UBI_VID_DYNAMIC ? + UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME; + if (av->vol_type != vol_type) { + ubi_err("bad vol_type"); + goto bad_vid_hdr; + } + + if (aeb->sqnum != be64_to_cpu(vidh->sqnum)) { + ubi_err("bad sqnum %llu", aeb->sqnum); + goto bad_vid_hdr; + } + + if (av->vol_id != be32_to_cpu(vidh->vol_id)) { + ubi_err("bad vol_id %d", av->vol_id); + goto bad_vid_hdr; + } + + if (av->compat != vidh->compat) { + ubi_err("bad compat %d", vidh->compat); + goto bad_vid_hdr; + } + + if (aeb->lnum != be32_to_cpu(vidh->lnum)) { + ubi_err("bad lnum %d", aeb->lnum); + goto bad_vid_hdr; + } + + if (av->used_ebs != be32_to_cpu(vidh->used_ebs)) { + ubi_err("bad used_ebs %d", av->used_ebs); + goto bad_vid_hdr; + } + + if (av->data_pad != be32_to_cpu(vidh->data_pad)) { + ubi_err("bad data_pad %d", av->data_pad); + goto bad_vid_hdr; + } + } + + if (!last_aeb) + continue; + + if (av->highest_lnum != be32_to_cpu(vidh->lnum)) { + ubi_err("bad highest_lnum %d", av->highest_lnum); + goto bad_vid_hdr; + } + + if (av->last_data_size != be32_to_cpu(vidh->data_size)) { + ubi_err("bad last_data_size %d", av->last_data_size); + goto bad_vid_hdr; + } + } + + /* + * Make sure that all the physical eraseblocks are in one of the lists + * or trees. + */ + buf = kzalloc(ubi->peb_count, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + for (pnum = 0; pnum < ubi->peb_count; pnum++) { + err = ubi_io_is_bad(ubi, pnum); + if (err < 0) { + kfree(buf); + return err; + } else if (err) + buf[pnum] = 1; + } + + ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) + ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) + buf[aeb->pnum] = 1; + + list_for_each_entry(aeb, &ai->free, u.list) + buf[aeb->pnum] = 1; + + list_for_each_entry(aeb, &ai->corr, u.list) + buf[aeb->pnum] = 1; + + list_for_each_entry(aeb, &ai->erase, u.list) + buf[aeb->pnum] = 1; + + list_for_each_entry(aeb, &ai->alien, u.list) + buf[aeb->pnum] = 1; + + err = 0; + for (pnum = 0; pnum < ubi->peb_count; pnum++) + if (!buf[pnum]) { + ubi_err("PEB %d is not referred", pnum); + err = 1; + } + + kfree(buf); + if (err) + goto out; + return 0; + +bad_aeb: + ubi_err("bad attaching information about LEB %d", aeb->lnum); + ubi_dump_aeb(aeb, 0); + ubi_dump_av(av); + goto out; + +bad_av: + ubi_err("bad attaching information about volume %d", av->vol_id); + ubi_dump_av(av); + goto out; + +bad_vid_hdr: + ubi_err("bad attaching information about volume %d", av->vol_id); + ubi_dump_av(av); + ubi_dump_vid_hdr(vidh); + +out: + dump_stack(); + return -EINVAL; +} diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 6d86c0b..7094b9c 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -15,56 +15,88 @@ * module load parameters or the kernel boot parameters. If MTD devices were * specified, UBI does not attach any MTD device, but it is possible to do * later using the "UBI control device". - * - * At the moment we only attach UBI devices by scanning, which will become a - * bottleneck when flashes reach certain large size. Then one may improve UBI - * and add other methods, although it does not seem to be easy to do. */ -#ifdef UBI_LINUX -#include <linux/err.h> +#define __UBOOT__ +#ifndef __UBOOT__ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/stringify.h> +#include <linux/namei.h> #include <linux/stat.h> #include <linux/miscdevice.h> #include <linux/log2.h> #include <linux/kthread.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/major.h> +#else +#include <linux/compat.h> #endif +#include <linux/err.h> #include <ubi_uboot.h> +#include <linux/mtd/partitions.h> + #include "ubi.h" +/* Maximum length of the 'mtd=' parameter */ +#define MTD_PARAM_LEN_MAX 64 + +/* Maximum number of comma-separated items in the 'mtd=' parameter */ +#define MTD_PARAM_MAX_COUNT 4 + +/* Maximum value for the number of bad PEBs per 1024 PEBs */ +#define MAX_MTD_UBI_BEB_LIMIT 768 + +#ifdef CONFIG_MTD_UBI_MODULE +#define ubi_is_module() 1 +#else +#define ubi_is_module() 0 +#endif + #if (CONFIG_SYS_MALLOC_LEN < (512 << 10)) #error Malloc area too small for UBI, increase CONFIG_SYS_MALLOC_LEN to >= 512k #endif -/* Maximum length of the 'mtd=' parameter */ -#define MTD_PARAM_LEN_MAX 64 - /** * struct mtd_dev_param - MTD device parameter description data structure. - * @name: MTD device name or number string + * @name: MTD character device node path, MTD device name, or MTD device number + * string * @vid_hdr_offs: VID header offset + * @max_beb_per1024: maximum expected number of bad PEBs per 1024 PEBs */ -struct mtd_dev_param -{ +struct mtd_dev_param { char name[MTD_PARAM_LEN_MAX]; + int ubi_num; int vid_hdr_offs; + int max_beb_per1024; }; /* Numbers of elements set in the @mtd_dev_param array */ -static int mtd_devs = 0; +static int __initdata mtd_devs; /* MTD devices specification parameters */ -static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES]; - +static struct mtd_dev_param __initdata mtd_dev_param[UBI_MAX_DEVICES]; +#ifndef __UBOOT__ +#ifdef CONFIG_MTD_UBI_FASTMAP +/* UBI module parameter to enable fastmap automatically on non-fastmap images */ +static bool fm_autoconvert; +#endif +#else +#ifdef CONFIG_MTD_UBI_FASTMAP +#if !defined(CONFIG_MTD_UBI_FASTMAP_AUTOCONVERT) +#define CONFIG_MTD_UBI_FASTMAP_AUTOCONVERT 0 +#endif +static bool fm_autoconvert = CONFIG_MTD_UBI_FASTMAP_AUTOCONVERT; +#endif +#endif /* Root UBI "class" object (corresponds to '/<sysfs>/class/ubi/') */ struct class *ubi_class; -#ifdef UBI_LINUX /* Slab cache for wear-leveling entries */ struct kmem_cache *ubi_wl_entry_slab; +#ifndef __UBOOT__ /* UBI control character device */ static struct miscdevice ubi_ctrl_cdev = { .minor = MISC_DYNAMIC_MINOR, @@ -74,9 +106,13 @@ static struct miscdevice ubi_ctrl_cdev = { #endif /* All UBI devices in system */ +#ifndef __UBOOT__ +static struct ubi_device *ubi_devices[UBI_MAX_DEVICES]; +#else struct ubi_device *ubi_devices[UBI_MAX_DEVICES]; +#endif -#ifdef UBI_LINUX +#ifndef __UBOOT__ /* Serializes UBI devices creations and removals */ DEFINE_MUTEX(ubi_devices_mutex); @@ -84,7 +120,8 @@ DEFINE_MUTEX(ubi_devices_mutex); static DEFINE_SPINLOCK(ubi_devices_lock); /* "Show" method for files in '/<sysfs>/class/ubi/' */ -static ssize_t ubi_version_show(struct class *class, char *buf) +static ssize_t ubi_version_show(struct class *class, + struct class_attribute *attr, char *buf) { return sprintf(buf, "%d\n", UBI_VERSION); } @@ -122,6 +159,112 @@ static struct device_attribute dev_mtd_num = #endif /** + * ubi_volume_notify - send a volume change notification. + * @ubi: UBI device description object + * @vol: volume description object of the changed volume + * @ntype: notification type to send (%UBI_VOLUME_ADDED, etc) + * + * This is a helper function which notifies all subscribers about a volume + * change event (creation, removal, re-sizing, re-naming, updating). Returns + * zero in case of success and a negative error code in case of failure. + */ +int ubi_volume_notify(struct ubi_device *ubi, struct ubi_volume *vol, int ntype) +{ + struct ubi_notification nt; + + ubi_do_get_device_info(ubi, &nt.di); + ubi_do_get_volume_info(ubi, vol, &nt.vi); + +#ifdef CONFIG_MTD_UBI_FASTMAP + switch (ntype) { + case UBI_VOLUME_ADDED: + case UBI_VOLUME_REMOVED: + case UBI_VOLUME_RESIZED: + case UBI_VOLUME_RENAMED: + if (ubi_update_fastmap(ubi)) { + ubi_err("Unable to update fastmap!"); + ubi_ro_mode(ubi); + } + } +#endif + return blocking_notifier_call_chain(&ubi_notifiers, ntype, &nt); +} + +/** + * ubi_notify_all - send a notification to all volumes. + * @ubi: UBI device description object + * @ntype: notification type to send (%UBI_VOLUME_ADDED, etc) + * @nb: the notifier to call + * + * This function walks all volumes of UBI device @ubi and sends the @ntype + * notification for each volume. If @nb is %NULL, then all registered notifiers + * are called, otherwise only the @nb notifier is called. Returns the number of + * sent notifications. + */ +int ubi_notify_all(struct ubi_device *ubi, int ntype, struct notifier_block *nb) +{ + struct ubi_notification nt; + int i, count = 0; +#ifndef __UBOOT__ + int ret; +#endif + + ubi_do_get_device_info(ubi, &nt.di); + + mutex_lock(&ubi->device_mutex); + for (i = 0; i < ubi->vtbl_slots; i++) { + /* + * Since the @ubi->device is locked, and we are not going to + * change @ubi->volumes, we do not have to lock + * @ubi->volumes_lock. + */ + if (!ubi->volumes[i]) + continue; + + ubi_do_get_volume_info(ubi, ubi->volumes[i], &nt.vi); +#ifndef __UBOOT__ + if (nb) + nb->notifier_call(nb, ntype, &nt); + else + ret = blocking_notifier_call_chain(&ubi_notifiers, ntype, + &nt); +#endif + count += 1; + } + mutex_unlock(&ubi->device_mutex); + + return count; +} + +/** + * ubi_enumerate_volumes - send "add" notification for all existing volumes. + * @nb: the notifier to call + * + * This function walks all UBI devices and volumes and sends the + * %UBI_VOLUME_ADDED notification for each volume. If @nb is %NULL, then all + * registered notifiers are called, otherwise only the @nb notifier is called. + * Returns the number of sent notifications. + */ +int ubi_enumerate_volumes(struct notifier_block *nb) +{ + int i, count = 0; + + /* + * Since the @ubi_devices_mutex is locked, and we are not going to + * change @ubi_devices, we do not have to lock @ubi_devices_lock. + */ + for (i = 0; i < UBI_MAX_DEVICES; i++) { + struct ubi_device *ubi = ubi_devices[i]; + + if (!ubi) + continue; + count += ubi_notify_all(ubi, UBI_VOLUME_ADDED, nb); + } + + return count; +} + +/** * ubi_get_device - get UBI device. * @ubi_num: UBI device number * @@ -159,8 +302,7 @@ void ubi_put_device(struct ubi_device *ubi) } /** - * ubi_get_by_major - get UBI device description object by character device - * major number. + * ubi_get_by_major - get UBI device by character device major number. * @major: major number * * This function is similar to 'ubi_get_device()', but it searches the device @@ -213,7 +355,7 @@ int ubi_major2num(int major) return ubi_num; } -#ifdef UBI_LINUX +#ifndef __UBOOT__ /* "Show" method for files in '/<sysfs>/class/ubi/ubiX/' */ static ssize_t dev_attribute_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -265,28 +407,35 @@ static ssize_t dev_attribute_show(struct device *dev, return ret; } -/* Fake "release" method for UBI devices */ -static void dev_release(struct device *dev) { } +static void dev_release(struct device *dev) +{ + struct ubi_device *ubi = container_of(dev, struct ubi_device, dev); + + kfree(ubi); +} /** * ubi_sysfs_init - initialize sysfs for an UBI device. * @ubi: UBI device description object + * @ref: set to %1 on exit in case of failure if a reference to @ubi->dev was + * taken * * This function returns zero in case of success and a negative error code in * case of failure. */ -static int ubi_sysfs_init(struct ubi_device *ubi) +static int ubi_sysfs_init(struct ubi_device *ubi, int *ref) { int err; ubi->dev.release = dev_release; ubi->dev.devt = ubi->cdev.dev; ubi->dev.class = ubi_class; - sprintf(&ubi->dev.bus_id[0], UBI_NAME_STR"%d", ubi->ubi_num); + dev_set_name(&ubi->dev, UBI_NAME_STR"%d", ubi->ubi_num); err = device_register(&ubi->dev); if (err) return err; + *ref = 1; err = device_create_file(&ubi->dev, &dev_eraseblock_size); if (err) return err; @@ -343,7 +492,7 @@ static void ubi_sysfs_close(struct ubi_device *ubi) #endif /** - * kill_volumes - destroy all volumes. + * kill_volumes - destroy all user volumes. * @ubi: UBI device description object */ static void kill_volumes(struct ubi_device *ubi) @@ -358,17 +507,29 @@ static void kill_volumes(struct ubi_device *ubi) /** * uif_init - initialize user interfaces for an UBI device. * @ubi: UBI device description object + * @ref: set to %1 on exit in case of failure if a reference to @ubi->dev was + * taken, otherwise set to %0 + * + * This function initializes various user interfaces for an UBI device. If the + * initialization fails at an early stage, this function frees all the + * resources it allocated, returns an error, and @ref is set to %0. However, + * if the initialization fails after the UBI device was registered in the + * driver core subsystem, this function takes a reference to @ubi->dev, because + * otherwise the release function ('dev_release()') would free whole @ubi + * object. The @ref argument is set to %1 in this case. The caller has to put + * this reference. * * This function returns zero in case of success and a negative error code in * case of failure. */ -static int uif_init(struct ubi_device *ubi) +static int uif_init(struct ubi_device *ubi, int *ref) { int i, err; -#ifdef UBI_LINUX +#ifndef __UBOOT__ dev_t dev; #endif + *ref = 0; sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num); /* @@ -387,7 +548,7 @@ static int uif_init(struct ubi_device *ubi) ubi_assert(MINOR(dev) == 0); cdev_init(&ubi->cdev, &ubi_cdev_operations); - dbg_msg("%s major is %u", ubi->ubi_name, MAJOR(dev)); + dbg_gen("%s major is %u", ubi->ubi_name, MAJOR(dev)); ubi->cdev.owner = THIS_MODULE; err = cdev_add(&ubi->cdev, dev, 1); @@ -396,7 +557,7 @@ static int uif_init(struct ubi_device *ubi) goto out_unreg; } - err = ubi_sysfs_init(ubi); + err = ubi_sysfs_init(ubi, ref); if (err) goto out_sysfs; @@ -414,6 +575,8 @@ static int uif_init(struct ubi_device *ubi) out_volumes: kill_volumes(ubi); out_sysfs: + if (*ref) + get_device(&ubi->dev); ubi_sysfs_close(ubi); cdev_del(&ubi->cdev); out_unreg: @@ -425,6 +588,10 @@ out_unreg: /** * uif_close - close user interfaces for an UBI device. * @ubi: UBI device description object + * + * Note, since this function un-registers UBI volume device objects (@vol->dev), + * the memory allocated voe the volumes is freed as well (in the release + * function). */ static void uif_close(struct ubi_device *ubi) { @@ -435,58 +602,52 @@ static void uif_close(struct ubi_device *ubi) } /** - * attach_by_scanning - attach an MTD device using scanning method. - * @ubi: UBI device descriptor - * - * This function returns zero in case of success and a negative error code in - * case of failure. - * - * Note, currently this is the only method to attach UBI devices. Hopefully in - * the future we'll have more scalable attaching methods and avoid full media - * scanning. But even in this case scanning will be needed as a fall-back - * attaching method if there are some on-flash table corruptions. + * ubi_free_internal_volumes - free internal volumes. + * @ubi: UBI device description object */ -static int attach_by_scanning(struct ubi_device *ubi) +void ubi_free_internal_volumes(struct ubi_device *ubi) { - int err; - struct ubi_scan_info *si; + int i; - si = ubi_scan(ubi); - if (IS_ERR(si)) - return PTR_ERR(si); + for (i = ubi->vtbl_slots; + i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { + kfree(ubi->volumes[i]->eba_tbl); + kfree(ubi->volumes[i]); + } +} - ubi->bad_peb_count = si->bad_peb_count; - ubi->good_peb_count = ubi->peb_count - ubi->bad_peb_count; - ubi->max_ec = si->max_ec; - ubi->mean_ec = si->mean_ec; +static int get_bad_peb_limit(const struct ubi_device *ubi, int max_beb_per1024) +{ + int limit, device_pebs; + uint64_t device_size; - err = ubi_read_volume_table(ubi, si); - if (err) - goto out_si; + if (!max_beb_per1024) + return 0; - err = ubi_eba_init_scan(ubi, si); - if (err) - goto out_vtbl; + /* + * Here we are using size of the entire flash chip and + * not just the MTD partition size because the maximum + * number of bad eraseblocks is a percentage of the + * whole device and bad eraseblocks are not fairly + * distributed over the flash chip. So the worst case + * is that all the bad eraseblocks of the chip are in + * the MTD partition we are attaching (ubi->mtd). + */ + device_size = mtd_get_device_size(ubi->mtd); + device_pebs = mtd_div_by_eb(device_size, ubi->mtd); + limit = mult_frac(device_pebs, max_beb_per1024, 1024); - err = ubi_wl_init_scan(ubi, si); - if (err) - goto out_eba; + /* Round it up */ + if (mult_frac(limit, 1024, max_beb_per1024) < device_pebs) + limit += 1; - ubi_scan_destroy_si(si); - return 0; - -out_eba: - ubi_eba_close(ubi); -out_vtbl: - vfree(ubi->vtbl); -out_si: - ubi_scan_destroy_si(si); - return err; + return limit; } /** - * io_init - initialize I/O unit for a given UBI device. + * io_init - initialize I/O sub-system for a given UBI device. * @ubi: UBI device description object + * @max_beb_per1024: maximum expected number of bad PEB per 1024 PEBs * * If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are * assumed: @@ -499,8 +660,11 @@ out_si: * This function returns zero in case of success and a negative error code in * case of failure. */ -static int io_init(struct ubi_device *ubi) +static int io_init(struct ubi_device *ubi, int max_beb_per1024) { + dbg_gen("sizeof(struct ubi_ainf_peb) %zu", sizeof(struct ubi_ainf_peb)); + dbg_gen("sizeof(struct ubi_wl_entry) %zu", sizeof(struct ubi_wl_entry)); + if (ubi->mtd->numeraseregions != 0) { /* * Some flashes have several erase regions. Different regions @@ -527,8 +691,15 @@ static int io_init(struct ubi_device *ubi) ubi->peb_count = mtd_div_by_eb(ubi->mtd->size, ubi->mtd); ubi->flash_size = ubi->mtd->size; - if (mtd_can_have_bb(ubi->mtd)) + if (mtd_can_have_bb(ubi->mtd)) { ubi->bad_allowed = 1; + ubi->bad_peb_limit = get_bad_peb_limit(ubi, max_beb_per1024); + } + + if (ubi->mtd->type == MTD_NORFLASH) { + ubi_assert(ubi->mtd->writesize == 1); + ubi->nor_flash = 1; + } ubi->min_io_size = ubi->mtd->writesize; ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft; @@ -548,14 +719,28 @@ static int io_init(struct ubi_device *ubi) ubi_assert(ubi->hdrs_min_io_size <= ubi->min_io_size); ubi_assert(ubi->min_io_size % ubi->hdrs_min_io_size == 0); + ubi->max_write_size = ubi->mtd->writebufsize; + /* + * Maximum write size has to be greater or equivalent to min. I/O + * size, and be multiple of min. I/O size. + */ + if (ubi->max_write_size < ubi->min_io_size || + ubi->max_write_size % ubi->min_io_size || + !is_power_of_2(ubi->max_write_size)) { + ubi_err("bad write buffer size %d for %d min. I/O unit", + ubi->max_write_size, ubi->min_io_size); + return -EINVAL; + } + /* Calculate default aligned sizes of EC and VID headers */ ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size); ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size); - dbg_msg("min_io_size %d", ubi->min_io_size); - dbg_msg("hdrs_min_io_size %d", ubi->hdrs_min_io_size); - dbg_msg("ec_hdr_alsize %d", ubi->ec_hdr_alsize); - dbg_msg("vid_hdr_alsize %d", ubi->vid_hdr_alsize); + dbg_gen("min_io_size %d", ubi->min_io_size); + dbg_gen("max_write_size %d", ubi->max_write_size); + dbg_gen("hdrs_min_io_size %d", ubi->hdrs_min_io_size); + dbg_gen("ec_hdr_alsize %d", ubi->ec_hdr_alsize); + dbg_gen("vid_hdr_alsize %d", ubi->vid_hdr_alsize); if (ubi->vid_hdr_offset == 0) /* Default offset */ @@ -569,13 +754,13 @@ static int io_init(struct ubi_device *ubi) } /* Similar for the data offset */ - ubi->leb_start = ubi->vid_hdr_offset + UBI_EC_HDR_SIZE; + ubi->leb_start = ubi->vid_hdr_offset + UBI_VID_HDR_SIZE; ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size); - dbg_msg("vid_hdr_offset %d", ubi->vid_hdr_offset); - dbg_msg("vid_hdr_aloffset %d", ubi->vid_hdr_aloffset); - dbg_msg("vid_hdr_shift %d", ubi->vid_hdr_shift); - dbg_msg("leb_start %d", ubi->leb_start); + dbg_gen("vid_hdr_offset %d", ubi->vid_hdr_offset); + dbg_gen("vid_hdr_aloffset %d", ubi->vid_hdr_aloffset); + dbg_gen("vid_hdr_shift %d", ubi->vid_hdr_shift); + dbg_gen("leb_start %d", ubi->leb_start); /* The shift must be aligned to 32-bit boundary */ if (ubi->vid_hdr_shift % 4) { @@ -595,41 +780,38 @@ static int io_init(struct ubi_device *ubi) } /* + * Set maximum amount of physical erroneous eraseblocks to be 10%. + * Erroneous PEB are those which have read errors. + */ + ubi->max_erroneous = ubi->peb_count / 10; + if (ubi->max_erroneous < 16) + ubi->max_erroneous = 16; + dbg_gen("max_erroneous %d", ubi->max_erroneous); + + /* * It may happen that EC and VID headers are situated in one minimal * I/O unit. In this case we can only accept this UBI image in * read-only mode. */ if (ubi->vid_hdr_offset + UBI_VID_HDR_SIZE <= ubi->hdrs_min_io_size) { - ubi_warn("EC and VID headers are in the same minimal I/O unit, " - "switch to read-only mode"); + ubi_warn("EC and VID headers are in the same minimal I/O unit, switch to read-only mode"); ubi->ro_mode = 1; } ubi->leb_size = ubi->peb_size - ubi->leb_start; if (!(ubi->mtd->flags & MTD_WRITEABLE)) { - ubi_msg("MTD device %d is write-protected, attach in " - "read-only mode", ubi->mtd->index); + ubi_msg("MTD device %d is write-protected, attach in read-only mode", + ubi->mtd->index); ubi->ro_mode = 1; } - ubi_msg("physical eraseblock size: %d bytes (%d KiB)", - ubi->peb_size, ubi->peb_size >> 10); - ubi_msg("logical eraseblock size: %d bytes", ubi->leb_size); - ubi_msg("smallest flash I/O unit: %d", ubi->min_io_size); - if (ubi->hdrs_min_io_size != ubi->min_io_size) - ubi_msg("sub-page size: %d", - ubi->hdrs_min_io_size); - ubi_msg("VID header offset: %d (aligned %d)", - ubi->vid_hdr_offset, ubi->vid_hdr_aloffset); - ubi_msg("data offset: %d", ubi->leb_start); - /* - * Note, ideally, we have to initialize ubi->bad_peb_count here. But + * Note, ideally, we have to initialize @ubi->bad_peb_count here. But * unfortunately, MTD does not provide this information. We should loop * over all physical eraseblocks and invoke mtd->block_is_bad() for - * each physical eraseblock. So, we skip ubi->bad_peb_count - * uninitialized and initialize it after scanning. + * each physical eraseblock. So, we leave @ubi->bad_peb_count + * uninitialized so far. */ return 0; @@ -640,7 +822,7 @@ static int io_init(struct ubi_device *ubi) * @ubi: UBI device description object * @vol_id: ID of the volume to re-size * - * This function re-sizes the volume marked by the @UBI_VTBL_AUTORESIZE_FLG in + * This function re-sizes the volume marked by the %UBI_VTBL_AUTORESIZE_FLG in * the volume table to the largest possible size. See comments in ubi-header.h * for more description of the flag. Returns zero in case of success and a * negative error code in case of failure. @@ -651,9 +833,14 @@ static int autoresize(struct ubi_device *ubi, int vol_id) struct ubi_volume *vol = ubi->volumes[vol_id]; int err, old_reserved_pebs = vol->reserved_pebs; + if (ubi->ro_mode) { + ubi_warn("skip auto-resize because of R/O mode"); + return 0; + } + /* * Clear the auto-resize flag in the volume in-memory copy of the - * volume table, and 'ubi_resize_volume()' will propogate this change + * volume table, and 'ubi_resize_volume()' will propagate this change * to the flash. */ ubi->vtbl[vol_id].flags &= ~UBI_VTBL_AUTORESIZE_FLG; @@ -662,11 +849,10 @@ static int autoresize(struct ubi_device *ubi, int vol_id) struct ubi_vtbl_record vtbl_rec; /* - * No avalilable PEBs to re-size the volume, clear the flag on + * No available PEBs to re-size the volume, clear the flag on * flash and exit. */ - memcpy(&vtbl_rec, &ubi->vtbl[vol_id], - sizeof(struct ubi_vtbl_record)); + vtbl_rec = ubi->vtbl[vol_id]; err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); if (err) ubi_err("cannot clean auto-resize flag for volume %d", @@ -689,23 +875,31 @@ static int autoresize(struct ubi_device *ubi, int vol_id) /** * ubi_attach_mtd_dev - attach an MTD device. - * @mtd_dev: MTD device description object + * @mtd: MTD device description object * @ubi_num: number to assign to the new UBI device * @vid_hdr_offset: VID header offset + * @max_beb_per1024: maximum expected number of bad PEB per 1024 PEBs * * This function attaches MTD device @mtd_dev to UBI and assign @ubi_num number * to the newly created UBI device, unless @ubi_num is %UBI_DEV_NUM_AUTO, in - * which case this function finds a vacant device nubert and assings it + * which case this function finds a vacant device number and assigns it * automatically. Returns the new UBI device number in case of success and a * negative error code in case of failure. * * Note, the invocations of this function has to be serialized by the * @ubi_devices_mutex. */ -int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) +int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, + int vid_hdr_offset, int max_beb_per1024) { struct ubi_device *ubi; - int i, err; + int i, err, ref = 0; + + if (max_beb_per1024 < 0 || max_beb_per1024 > MAX_MTD_UBI_BEB_LIMIT) + return -EINVAL; + + if (!max_beb_per1024) + max_beb_per1024 = CONFIG_MTD_UBI_BEB_LIMIT; /* * Check if we already have the same MTD device attached. @@ -716,7 +910,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) for (i = 0; i < UBI_MAX_DEVICES; i++) { ubi = ubi_devices[i]; if (ubi && mtd->index == ubi->mtd->index) { - dbg_err("mtd%d is already attached to ubi%d", + ubi_err("mtd%d is already attached to ubi%d", mtd->index, i); return -EEXIST; } @@ -731,8 +925,8 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) * no sense to attach emulated MTD devices, so we prohibit this. */ if (mtd->type == MTD_UBIVOLUME) { - ubi_err("refuse attaching mtd%d - it is already emulated on " - "top of UBI", mtd->index); + ubi_err("refuse attaching mtd%d - it is already emulated on top of UBI", + mtd->index); return -EINVAL; } @@ -742,7 +936,8 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) if (!ubi_devices[ubi_num]) break; if (ubi_num == UBI_MAX_DEVICES) { - dbg_err("only %d UBI devices may be created", UBI_MAX_DEVICES); + ubi_err("only %d UBI devices may be created", + UBI_MAX_DEVICES); return -ENFILE; } } else { @@ -751,7 +946,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) /* Make sure ubi_num is not busy */ if (ubi_devices[ubi_num]) { - dbg_err("ubi%d already exists", ubi_num); + ubi_err("ubi%d already exists", ubi_num); return -EEXIST; } } @@ -765,36 +960,61 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) ubi->vid_hdr_offset = vid_hdr_offset; ubi->autoresize_vol_id = -1; +#ifdef CONFIG_MTD_UBI_FASTMAP + ubi->fm_pool.used = ubi->fm_pool.size = 0; + ubi->fm_wl_pool.used = ubi->fm_wl_pool.size = 0; + + /* + * fm_pool.max_size is 5% of the total number of PEBs but it's also + * between UBI_FM_MAX_POOL_SIZE and UBI_FM_MIN_POOL_SIZE. + */ + ubi->fm_pool.max_size = min(((int)mtd_div_by_eb(ubi->mtd->size, + ubi->mtd) / 100) * 5, UBI_FM_MAX_POOL_SIZE); + if (ubi->fm_pool.max_size < UBI_FM_MIN_POOL_SIZE) + ubi->fm_pool.max_size = UBI_FM_MIN_POOL_SIZE; + + ubi->fm_wl_pool.max_size = UBI_FM_WL_POOL_SIZE; + ubi->fm_disabled = !fm_autoconvert; + + if (!ubi->fm_disabled && (int)mtd_div_by_eb(ubi->mtd->size, ubi->mtd) + <= UBI_FM_MAX_START) { + ubi_err("More than %i PEBs are needed for fastmap, sorry.", + UBI_FM_MAX_START); + ubi->fm_disabled = 1; + } + + ubi_msg("default fastmap pool size: %d", ubi->fm_pool.max_size); + ubi_msg("default fastmap WL pool size: %d", ubi->fm_wl_pool.max_size); +#else + ubi->fm_disabled = 1; +#endif mutex_init(&ubi->buf_mutex); mutex_init(&ubi->ckvol_mutex); - mutex_init(&ubi->volumes_mutex); + mutex_init(&ubi->device_mutex); spin_lock_init(&ubi->volumes_lock); + mutex_init(&ubi->fm_mutex); + init_rwsem(&ubi->fm_sem); ubi_msg("attaching mtd%d to ubi%d", mtd->index, ubi_num); - err = io_init(ubi); + err = io_init(ubi, max_beb_per1024); if (err) goto out_free; err = -ENOMEM; - ubi->peb_buf1 = vmalloc(ubi->peb_size); - if (!ubi->peb_buf1) + ubi->peb_buf = vmalloc(ubi->peb_size); + if (!ubi->peb_buf) goto out_free; - ubi->peb_buf2 = vmalloc(ubi->peb_size); - if (!ubi->peb_buf2) - goto out_free; - -#ifdef CONFIG_MTD_UBI_DEBUG - mutex_init(&ubi->dbg_buf_mutex); - ubi->dbg_peb_buf = vmalloc(ubi->peb_size); - if (!ubi->dbg_peb_buf) +#ifdef CONFIG_MTD_UBI_FASTMAP + ubi->fm_size = ubi_calc_fm_size(ubi); + ubi->fm_buf = vzalloc(ubi->fm_size); + if (!ubi->fm_buf) goto out_free; #endif - - err = attach_by_scanning(ubi); + err = ubi_attach(ubi, 0); if (err) { - dbg_err("failed to attach by scanning, error %d", err); + ubi_err("failed to attach mtd%d, error %d", mtd->index, err); goto out_free; } @@ -804,56 +1024,71 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) goto out_detach; } - err = uif_init(ubi); + err = uif_init(ubi, &ref); if (err) goto out_detach; - ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name); + err = ubi_debugfs_init_dev(ubi); + if (err) + goto out_uif; + + ubi->bgt_thread = kthread_create(ubi_thread, ubi, "%s", ubi->bgt_name); if (IS_ERR(ubi->bgt_thread)) { err = PTR_ERR(ubi->bgt_thread); ubi_err("cannot spawn \"%s\", error %d", ubi->bgt_name, err); - goto out_uif; + goto out_debugfs; } - ubi_msg("attached mtd%d to ubi%d", mtd->index, ubi_num); - ubi_msg("MTD device name: \"%s\"", mtd->name); - ubi_msg("MTD device size: %llu MiB", ubi->flash_size >> 20); - ubi_msg("number of good PEBs: %d", ubi->good_peb_count); - ubi_msg("number of bad PEBs: %d", ubi->bad_peb_count); - ubi_msg("max. allowed volumes: %d", ubi->vtbl_slots); - ubi_msg("wear-leveling threshold: %d", CONFIG_MTD_UBI_WL_THRESHOLD); - ubi_msg("number of internal volumes: %d", UBI_INT_VOL_COUNT); - ubi_msg("number of user volumes: %d", - ubi->vol_count - UBI_INT_VOL_COUNT); - ubi_msg("available PEBs: %d", ubi->avail_pebs); - ubi_msg("total number of reserved PEBs: %d", ubi->rsvd_pebs); - ubi_msg("number of PEBs reserved for bad PEB handling: %d", - ubi->beb_rsvd_pebs); - ubi_msg("max/mean erase counter: %d/%d", ubi->max_ec, ubi->mean_ec); - - /* Enable the background thread */ - if (!DBG_DISABLE_BGT) { - ubi->thread_enabled = 1; - wake_up_process(ubi->bgt_thread); - } + ubi_msg("attached mtd%d (name \"%s\", size %llu MiB) to ubi%d", + mtd->index, mtd->name, ubi->flash_size >> 20, ubi_num); + ubi_msg("PEB size: %d bytes (%d KiB), LEB size: %d bytes", + ubi->peb_size, ubi->peb_size >> 10, ubi->leb_size); + ubi_msg("min./max. I/O unit sizes: %d/%d, sub-page size %d", + ubi->min_io_size, ubi->max_write_size, ubi->hdrs_min_io_size); + ubi_msg("VID header offset: %d (aligned %d), data offset: %d", + ubi->vid_hdr_offset, ubi->vid_hdr_aloffset, ubi->leb_start); + ubi_msg("good PEBs: %d, bad PEBs: %d, corrupted PEBs: %d", + ubi->good_peb_count, ubi->bad_peb_count, ubi->corr_peb_count); + ubi_msg("user volume: %d, internal volumes: %d, max. volumes count: %d", + ubi->vol_count - UBI_INT_VOL_COUNT, UBI_INT_VOL_COUNT, + ubi->vtbl_slots); + ubi_msg("max/mean erase counter: %d/%d, WL threshold: %d, image sequence number: %u", + ubi->max_ec, ubi->mean_ec, CONFIG_MTD_UBI_WL_THRESHOLD, + ubi->image_seq); + ubi_msg("available PEBs: %d, total reserved PEBs: %d, PEBs reserved for bad PEB handling: %d", + ubi->avail_pebs, ubi->rsvd_pebs, ubi->beb_rsvd_pebs); + + /* + * The below lock makes sure we do not race with 'ubi_thread()' which + * checks @ubi->thread_enabled. Otherwise we may fail to wake it up. + */ + spin_lock(&ubi->wl_lock); + ubi->thread_enabled = 1; + wake_up_process(ubi->bgt_thread); + spin_unlock(&ubi->wl_lock); ubi_devices[ubi_num] = ubi; + ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL); return ubi_num; +out_debugfs: + ubi_debugfs_exit_dev(ubi); out_uif: + get_device(&ubi->dev); + ubi_assert(ref); uif_close(ubi); out_detach: - ubi_eba_close(ubi); ubi_wl_close(ubi); + ubi_free_internal_volumes(ubi); vfree(ubi->vtbl); out_free: - vfree(ubi->peb_buf1); - vfree(ubi->peb_buf2); -#ifdef CONFIG_MTD_UBI_DEBUG - vfree(ubi->dbg_peb_buf); -#endif - kfree(ubi); + vfree(ubi->peb_buf); + vfree(ubi->fm_buf); + if (ref) + put_device(&ubi->dev); + else + kfree(ubi); return err; } @@ -877,13 +1112,13 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) return -EINVAL; - spin_lock(&ubi_devices_lock); - ubi = ubi_devices[ubi_num]; - if (!ubi) { - spin_unlock(&ubi_devices_lock); + ubi = ubi_get_device(ubi_num); + if (!ubi) return -EINVAL; - } + spin_lock(&ubi_devices_lock); + put_device(&ubi->dev); + ubi->ref_count -= 1; if (ubi->ref_count) { if (!anyway) { spin_unlock(&ubi_devices_lock); @@ -897,8 +1132,13 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) spin_unlock(&ubi_devices_lock); ubi_assert(ubi_num == ubi->ubi_num); - dbg_msg("detaching mtd%d from ubi%d", ubi->mtd->index, ubi_num); - + ubi_notify_all(ubi, UBI_VOLUME_REMOVED, NULL); + ubi_msg("detaching mtd%d from ubi%d", ubi->mtd->index, ubi_num); +#ifdef CONFIG_MTD_UBI_FASTMAP + /* If we don't write a new fastmap at detach time we lose all + * EC updates that have been made since the last written fastmap. */ + ubi_update_fastmap(ubi); +#endif /* * Before freeing anything, we have to stop the background thread to * prevent it from doing anything on this device while we are freeing. @@ -906,29 +1146,73 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) if (ubi->bgt_thread) kthread_stop(ubi->bgt_thread); + /* + * Get a reference to the device in order to prevent 'dev_release()' + * from freeing the @ubi object. + */ + get_device(&ubi->dev); + + ubi_debugfs_exit_dev(ubi); uif_close(ubi); - ubi_eba_close(ubi); + ubi_wl_close(ubi); + ubi_free_internal_volumes(ubi); vfree(ubi->vtbl); put_mtd_device(ubi->mtd); - vfree(ubi->peb_buf1); - vfree(ubi->peb_buf2); -#ifdef CONFIG_MTD_UBI_DEBUG - vfree(ubi->dbg_peb_buf); -#endif + vfree(ubi->peb_buf); + vfree(ubi->fm_buf); ubi_msg("mtd%d is detached from ubi%d", ubi->mtd->index, ubi->ubi_num); - kfree(ubi); + put_device(&ubi->dev); return 0; } +#ifndef __UBOOT__ /** - * find_mtd_device - open an MTD device by its name or number. - * @mtd_dev: name or number of the device + * open_mtd_by_chdev - open an MTD device by its character device node path. + * @mtd_dev: MTD character device node path + * + * This helper function opens an MTD device by its character node device path. + * Returns MTD device description object in case of success and a negative + * error code in case of failure. + */ +static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev) +{ + int err, major, minor, mode; + struct path path; + + /* Probably this is an MTD character device node path */ + err = kern_path(mtd_dev, LOOKUP_FOLLOW, &path); + if (err) + return ERR_PTR(err); + + /* MTD device number is defined by the major / minor numbers */ + major = imajor(path.dentry->d_inode); + minor = iminor(path.dentry->d_inode); + mode = path.dentry->d_inode->i_mode; + path_put(&path); + if (major != MTD_CHAR_MAJOR || !S_ISCHR(mode)) + return ERR_PTR(-EINVAL); + + if (minor & 1) + /* + * Just do not think the "/dev/mtdrX" devices support is need, + * so do not support them to avoid doing extra work. + */ + return ERR_PTR(-EINVAL); + + return get_mtd_device(NULL, minor / 2); +} +#endif + +/** + * open_mtd_device - open MTD device by name, character device path, or number. + * @mtd_dev: name, character device node path, or MTD device device number * * This function tries to open and MTD device described by @mtd_dev string, - * which is first treated as an ASCII number, and if it is not true, it is - * treated as MTD device name. Returns MTD device description object in case of - * success and a negative error code in case of failure. + * which is first treated as ASCII MTD device number, and if it is not true, it + * is treated as MTD device name, and if that is also not true, it is treated + * as MTD character device node path. Returns MTD device description object in + * case of success and a negative error code in case of failure. */ static struct mtd_info * __init open_mtd_device(const char *mtd_dev) { @@ -943,13 +1227,22 @@ static struct mtd_info * __init open_mtd_device(const char *mtd_dev) * MTD device name. */ mtd = get_mtd_device_nm(mtd_dev); +#ifndef __UBOOT__ + if (IS_ERR(mtd) && PTR_ERR(mtd) == -ENODEV) + /* Probably this is an MTD character device node path */ + mtd = open_mtd_by_chdev(mtd_dev); +#endif } else mtd = get_mtd_device(NULL, mtd_num); return mtd; } -int __init ubi_init(void) +#ifndef __UBOOT__ +static int __init ubi_init(void) +#else +int ubi_init(void) +#endif { int err, i, k; @@ -982,13 +1275,18 @@ int __init ubi_init(void) goto out_version; } -#ifdef UBI_LINUX ubi_wl_entry_slab = kmem_cache_create("ubi_wl_entry_slab", sizeof(struct ubi_wl_entry), 0, 0, NULL); - if (!ubi_wl_entry_slab) + if (!ubi_wl_entry_slab) { + err = -ENOMEM; goto out_dev_unreg; -#endif + } + + err = ubi_debugfs_init(); + if (err) + goto out_slab; + /* Attach MTD devices */ for (i = 0; i < mtd_devs; i++) { @@ -1000,17 +1298,36 @@ int __init ubi_init(void) mtd = open_mtd_device(p->name); if (IS_ERR(mtd)) { err = PTR_ERR(mtd); - goto out_detach; + ubi_err("cannot open mtd %s, error %d", p->name, err); + /* See comment below re-ubi_is_module(). */ + if (ubi_is_module()) + goto out_detach; + continue; } mutex_lock(&ubi_devices_mutex); - err = ubi_attach_mtd_dev(mtd, UBI_DEV_NUM_AUTO, - p->vid_hdr_offs); + err = ubi_attach_mtd_dev(mtd, p->ubi_num, + p->vid_hdr_offs, p->max_beb_per1024); mutex_unlock(&ubi_devices_mutex); if (err < 0) { - put_mtd_device(mtd); ubi_err("cannot attach mtd%d", mtd->index); - goto out_detach; + put_mtd_device(mtd); + + /* + * Originally UBI stopped initializing on any error. + * However, later on it was found out that this + * behavior is not very good when UBI is compiled into + * the kernel and the MTD devices to attach are passed + * through the command line. Indeed, UBI failure + * stopped whole boot sequence. + * + * To fix this, we changed the behavior for the + * non-module case, but preserved the old behavior for + * the module case, just for compatibility. This is a + * little inconsistent, though. + */ + if (ubi_is_module()) + goto out_detach; } } @@ -1023,23 +1340,26 @@ out_detach: ubi_detach_mtd_dev(ubi_devices[k]->ubi_num, 1); mutex_unlock(&ubi_devices_mutex); } -#ifdef UBI_LINUX + ubi_debugfs_exit(); +out_slab: kmem_cache_destroy(ubi_wl_entry_slab); out_dev_unreg: -#endif misc_deregister(&ubi_ctrl_cdev); out_version: class_remove_file(ubi_class, &ubi_version); out_class: class_destroy(ubi_class); out: - mtd_devs = 0; - ubi_err("UBI error: cannot initialize UBI, error %d", err); + ubi_err("cannot initialize UBI, error %d", err); return err; } -module_init(ubi_init); +late_initcall(ubi_init); -void __exit ubi_exit(void) +#ifndef __UBOOT__ +static void __exit ubi_exit(void) +#else +void ubi_exit(void) +#endif { int i; @@ -1049,17 +1369,16 @@ void __exit ubi_exit(void) ubi_detach_mtd_dev(ubi_devices[i]->ubi_num, 1); mutex_unlock(&ubi_devices_mutex); } + ubi_debugfs_exit(); kmem_cache_destroy(ubi_wl_entry_slab); misc_deregister(&ubi_ctrl_cdev); class_remove_file(ubi_class, &ubi_version); class_destroy(ubi_class); - mtd_devs = 0; } module_exit(ubi_exit); /** - * bytes_str_to_int - convert a string representing number of bytes to an - * integer. + * bytes_str_to_int - convert a number of bytes string into an integer. * @str: the string to convert * * This function returns positive resulting integer in case of success and a @@ -1071,9 +1390,8 @@ static int __init bytes_str_to_int(const char *str) unsigned long result; result = simple_strtoul(str, &endp, 0); - if (str == endp || result < 0) { - printk(KERN_ERR "UBI error: incorrect bytes count: \"%s\"\n", - str); + if (str == endp || result >= INT_MAX) { + ubi_err("incorrect bytes count: \"%s\"\n", str); return -EINVAL; } @@ -1089,14 +1407,24 @@ static int __init bytes_str_to_int(const char *str) case '\0': break; default: - printk(KERN_ERR "UBI error: incorrect bytes count: \"%s\"\n", - str); + ubi_err("incorrect bytes count: \"%s\"\n", str); return -EINVAL; } return result; } +int kstrtoint(const char *s, unsigned int base, int *res) +{ + unsigned long long tmp; + + tmp = simple_strtoull(s, NULL, base); + if (tmp != (unsigned long long)(int)tmp) + return -ERANGE; + + return (int)tmp; +} + /** * ubi_mtd_param_parse - parse the 'mtd=' UBI parameter. * @val: the parameter value to parse @@ -1105,33 +1433,36 @@ static int __init bytes_str_to_int(const char *str) * This function returns zero in case of success and a negative error code in * case of error. */ -int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp) +#ifndef __UBOOT__ +static int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp) +#else +int ubi_mtd_param_parse(const char *val, struct kernel_param *kp) +#endif { int i, len; struct mtd_dev_param *p; char buf[MTD_PARAM_LEN_MAX]; char *pbuf = &buf[0]; - char *tokens[2] = {NULL, NULL}; + char *tokens[MTD_PARAM_MAX_COUNT], *token; if (!val) return -EINVAL; if (mtd_devs == UBI_MAX_DEVICES) { - printk(KERN_ERR "UBI error: too many parameters, max. is %d\n", - UBI_MAX_DEVICES); + ubi_err("too many parameters, max. is %d\n", + UBI_MAX_DEVICES); return -EINVAL; } len = strnlen(val, MTD_PARAM_LEN_MAX); if (len == MTD_PARAM_LEN_MAX) { - printk(KERN_ERR "UBI error: parameter \"%s\" is too long, " - "max. is %d\n", val, MTD_PARAM_LEN_MAX); + ubi_err("parameter \"%s\" is too long, max. is %d\n", + val, MTD_PARAM_LEN_MAX); return -EINVAL; } if (len == 0) { - printk(KERN_WARNING "UBI warning: empty 'mtd=' parameter - " - "ignored\n"); + pr_warn("UBI warning: empty 'mtd=' parameter - ignored\n"); return 0; } @@ -1141,40 +1472,69 @@ int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp) if (buf[len - 1] == '\n') buf[len - 1] = '\0'; - for (i = 0; i < 2; i++) + for (i = 0; i < MTD_PARAM_MAX_COUNT; i++) tokens[i] = strsep(&pbuf, ","); if (pbuf) { - printk(KERN_ERR "UBI error: too many arguments at \"%s\"\n", - val); + ubi_err("too many arguments at \"%s\"\n", val); return -EINVAL; } p = &mtd_dev_param[mtd_devs]; strcpy(&p->name[0], tokens[0]); - if (tokens[1]) - p->vid_hdr_offs = bytes_str_to_int(tokens[1]); + token = tokens[1]; + if (token) { + p->vid_hdr_offs = bytes_str_to_int(token); + + if (p->vid_hdr_offs < 0) + return p->vid_hdr_offs; + } - if (p->vid_hdr_offs < 0) - return p->vid_hdr_offs; + token = tokens[2]; + if (token) { + int err = kstrtoint(token, 10, &p->max_beb_per1024); + + if (err) { + ubi_err("bad value for max_beb_per1024 parameter: %s", + token); + return -EINVAL; + } + } + + token = tokens[3]; + if (token) { + int err = kstrtoint(token, 10, &p->ubi_num); + + if (err) { + ubi_err("bad value for ubi_num parameter: %s", token); + return -EINVAL; + } + } else + p->ubi_num = UBI_DEV_NUM_AUTO; mtd_devs += 1; return 0; } module_param_call(mtd, ubi_mtd_param_parse, NULL, NULL, 000); -MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: " - "mtd=<name|num>[,<vid_hdr_offs>].\n" +MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: mtd=<name|num|path>[,<vid_hdr_offs>[,max_beb_per1024[,ubi_num]]].\n" "Multiple \"mtd\" parameters may be specified.\n" - "MTD devices may be specified by their number or name.\n" - "Optional \"vid_hdr_offs\" parameter specifies UBI VID " - "header position and data starting position to be used " - "by UBI.\n" - "Example: mtd=content,1984 mtd=4 - attach MTD device" - "with name \"content\" using VID header offset 1984, and " - "MTD device number 4 with default VID header offset."); - + "MTD devices may be specified by their number, name, or path to the MTD character device node.\n" + "Optional \"vid_hdr_offs\" parameter specifies UBI VID header position to be used by UBI. (default value if 0)\n" + "Optional \"max_beb_per1024\" parameter specifies the maximum expected bad eraseblock per 1024 eraseblocks. (default value (" + __stringify(CONFIG_MTD_UBI_BEB_LIMIT) ") if 0)\n" + "Optional \"ubi_num\" parameter specifies UBI device number which have to be assigned to the newly created UBI device (assigned automatically by default)\n" + "\n" + "Example 1: mtd=/dev/mtd0 - attach MTD device /dev/mtd0.\n" + "Example 2: mtd=content,1984 mtd=4 - attach MTD device with name \"content\" using VID header offset 1984, and MTD device number 4 with default VID header offset.\n" + "Example 3: mtd=/dev/mtd1,0,25 - attach MTD device /dev/mtd1 using default VID header offset and reserve 25*nand_size_in_blocks/1024 erase blocks for bad block handling.\n" + "Example 4: mtd=/dev/mtd1,0,0,5 - attach MTD device /dev/mtd1 to UBI 5 and using default values for the other fields.\n" + "\t(e.g. if the NAND *chipset* has 4096 PEB, 100 will be reserved for this UBI device)."); +#ifdef CONFIG_MTD_UBI_FASTMAP +module_param(fm_autoconvert, bool, 0644); +MODULE_PARM_DESC(fm_autoconvert, "Set this parameter to enable fastmap automatically on images without a fastmap."); +#endif MODULE_VERSION(__stringify(UBI_VERSION)); MODULE_DESCRIPTION("UBI - Unsorted Block Images"); MODULE_AUTHOR("Artem Bityutskiy"); diff --git a/drivers/mtd/ubi/crc32.c b/drivers/mtd/ubi/crc32.c index f1bebf5..0d65bf4 100644 --- a/drivers/mtd/ubi/crc32.c +++ b/drivers/mtd/ubi/crc32.c @@ -20,7 +20,8 @@ * Version 2. See the file COPYING for more details. */ -#ifdef UBI_LINUX +#define __UBOOT__ +#ifndef __UBOOT__ #include <linux/crc32.h> #include <linux/kernel.h> #include <linux/module.h> @@ -30,7 +31,7 @@ #include <asm/byteorder.h> -#ifdef UBI_LINUX +#ifndef __UBOOT__ #include <linux/slab.h> #include <linux/init.h> #include <asm/atomic.h> @@ -46,7 +47,7 @@ #define tobe(x) (x) #endif #include "crc32table.h" -#ifdef UBI_LINUX +#ifndef __UBOOT__ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>"); MODULE_DESCRIPTION("Ethernet CRC32 calculations"); MODULE_LICENSE("GPL"); @@ -146,7 +147,7 @@ u32 crc32_le(u32 crc, unsigned char const *p, size_t len) # endif } #endif -#ifdef UBI_LINUX +#ifndef __UBOOT__ /** * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for @@ -379,7 +380,7 @@ EXPORT_SYMBOL(crc32_be); #include <stdlib.h> #include <stdio.h> -#ifdef UBI_LINUX /*Not used at present */ +#ifndef __UBOOT__ static void buf_dump(char const *prefix, unsigned char const *buf, size_t len) { @@ -405,7 +406,7 @@ static void random_garbage(unsigned char *buf, size_t len) *buf++ = (unsigned char) random(); } -#ifdef UBI_LINUX /* Not used at present */ +#ifndef __UBOOT__ static void store_le(u32 x, unsigned char *buf) { buf[0] = (unsigned char) x; diff --git a/drivers/mtd/ubi/crc32table.h b/drivers/mtd/ubi/crc32table.h index 0438af4..02ce6fd 100644 --- a/drivers/mtd/ubi/crc32table.h +++ b/drivers/mtd/ubi/crc32table.h @@ -66,7 +66,7 @@ tole(0xbad03605L), tole(0xcdd70693L), tole(0x54de5729L), tole(0x23d967bfL), tole(0xb3667a2eL), tole(0xc4614ab8L), tole(0x5d681b02L), tole(0x2a6f2b94L), tole(0xb40bbe37L), tole(0xc30c8ea1L), tole(0x5a05df1bL), tole(0x2d02ef8dL) }; -#ifdef UBI_LINUX +#ifndef __UBOOT__ static const u32 crc32table_be[] = { tobe(0x00000000L), tobe(0x04c11db7L), tobe(0x09823b6eL), tobe(0x0d4326d9L), tobe(0x130476dcL), tobe(0x17c56b6bL), tobe(0x1a864db2L), tobe(0x1e475005L), diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c index 6c22301..af254da 100644 --- a/drivers/mtd/ubi/debug.c +++ b/drivers/mtd/ubi/debug.c @@ -6,175 +6,455 @@ * Author: Artem Bityutskiy (Битюцкий Артём) */ -/* - * Here we keep all the UBI debugging stuff which should normally be disabled - * and compiled-out, but it is extremely helpful when hunting bugs or doing big - * changes. - */ #include <ubi_uboot.h> +#include "ubi.h" +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/debugfs.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#endif -#ifdef CONFIG_MTD_UBI_DEBUG_MSG +/** + * ubi_dump_flash - dump a region of flash. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to dump + * @offset: the starting offset within the physical eraseblock to dump + * @len: the length of the region to dump + */ +void ubi_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len) +{ + int err; + size_t read; + void *buf; + loff_t addr = (loff_t)pnum * ubi->peb_size + offset; -#include "ubi.h" + buf = vmalloc(len); + if (!buf) + return; + err = mtd_read(ubi->mtd, addr, len, &read, buf); + if (err && err != -EUCLEAN) { + ubi_err("error %d while reading %d bytes from PEB %d:%d, read %zd bytes", + err, len, pnum, offset, read); + goto out; + } + + ubi_msg("dumping %d bytes of data from PEB %d, offset %d", + len, pnum, offset); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf, len, 1); +out: + vfree(buf); + return; +} /** - * ubi_dbg_dump_ec_hdr - dump an erase counter header. + * ubi_dump_ec_hdr - dump an erase counter header. * @ec_hdr: the erase counter header to dump */ -void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) -{ - dbg_msg("erase counter header dump:"); - dbg_msg("magic %#08x", be32_to_cpu(ec_hdr->magic)); - dbg_msg("version %d", (int)ec_hdr->version); - dbg_msg("ec %llu", (long long)be64_to_cpu(ec_hdr->ec)); - dbg_msg("vid_hdr_offset %d", be32_to_cpu(ec_hdr->vid_hdr_offset)); - dbg_msg("data_offset %d", be32_to_cpu(ec_hdr->data_offset)); - dbg_msg("hdr_crc %#08x", be32_to_cpu(ec_hdr->hdr_crc)); - dbg_msg("erase counter header hexdump:"); +void ubi_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) +{ + pr_err("Erase counter header dump:\n"); + pr_err("\tmagic %#08x\n", be32_to_cpu(ec_hdr->magic)); + pr_err("\tversion %d\n", (int)ec_hdr->version); + pr_err("\tec %llu\n", (long long)be64_to_cpu(ec_hdr->ec)); + pr_err("\tvid_hdr_offset %d\n", be32_to_cpu(ec_hdr->vid_hdr_offset)); + pr_err("\tdata_offset %d\n", be32_to_cpu(ec_hdr->data_offset)); + pr_err("\timage_seq %d\n", be32_to_cpu(ec_hdr->image_seq)); + pr_err("\thdr_crc %#08x\n", be32_to_cpu(ec_hdr->hdr_crc)); + pr_err("erase counter header hexdump:\n"); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, ec_hdr, UBI_EC_HDR_SIZE, 1); } /** - * ubi_dbg_dump_vid_hdr - dump a volume identifier header. + * ubi_dump_vid_hdr - dump a volume identifier header. * @vid_hdr: the volume identifier header to dump */ -void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) -{ - dbg_msg("volume identifier header dump:"); - dbg_msg("magic %08x", be32_to_cpu(vid_hdr->magic)); - dbg_msg("version %d", (int)vid_hdr->version); - dbg_msg("vol_type %d", (int)vid_hdr->vol_type); - dbg_msg("copy_flag %d", (int)vid_hdr->copy_flag); - dbg_msg("compat %d", (int)vid_hdr->compat); - dbg_msg("vol_id %d", be32_to_cpu(vid_hdr->vol_id)); - dbg_msg("lnum %d", be32_to_cpu(vid_hdr->lnum)); - dbg_msg("leb_ver %u", be32_to_cpu(vid_hdr->leb_ver)); - dbg_msg("data_size %d", be32_to_cpu(vid_hdr->data_size)); - dbg_msg("used_ebs %d", be32_to_cpu(vid_hdr->used_ebs)); - dbg_msg("data_pad %d", be32_to_cpu(vid_hdr->data_pad)); - dbg_msg("sqnum %llu", +void ubi_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) +{ + pr_err("Volume identifier header dump:\n"); + pr_err("\tmagic %08x\n", be32_to_cpu(vid_hdr->magic)); + pr_err("\tversion %d\n", (int)vid_hdr->version); + pr_err("\tvol_type %d\n", (int)vid_hdr->vol_type); + pr_err("\tcopy_flag %d\n", (int)vid_hdr->copy_flag); + pr_err("\tcompat %d\n", (int)vid_hdr->compat); + pr_err("\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id)); + pr_err("\tlnum %d\n", be32_to_cpu(vid_hdr->lnum)); + pr_err("\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size)); + pr_err("\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs)); + pr_err("\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad)); + pr_err("\tsqnum %llu\n", (unsigned long long)be64_to_cpu(vid_hdr->sqnum)); - dbg_msg("hdr_crc %08x", be32_to_cpu(vid_hdr->hdr_crc)); - dbg_msg("volume identifier header hexdump:"); + pr_err("\thdr_crc %08x\n", be32_to_cpu(vid_hdr->hdr_crc)); + pr_err("Volume identifier header hexdump:\n"); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + vid_hdr, UBI_VID_HDR_SIZE, 1); } /** - * ubi_dbg_dump_vol_info- dump volume information. + * ubi_dump_vol_info - dump volume information. * @vol: UBI volume description object */ -void ubi_dbg_dump_vol_info(const struct ubi_volume *vol) -{ - dbg_msg("volume information dump:"); - dbg_msg("vol_id %d", vol->vol_id); - dbg_msg("reserved_pebs %d", vol->reserved_pebs); - dbg_msg("alignment %d", vol->alignment); - dbg_msg("data_pad %d", vol->data_pad); - dbg_msg("vol_type %d", vol->vol_type); - dbg_msg("name_len %d", vol->name_len); - dbg_msg("usable_leb_size %d", vol->usable_leb_size); - dbg_msg("used_ebs %d", vol->used_ebs); - dbg_msg("used_bytes %lld", vol->used_bytes); - dbg_msg("last_eb_bytes %d", vol->last_eb_bytes); - dbg_msg("corrupted %d", vol->corrupted); - dbg_msg("upd_marker %d", vol->upd_marker); +void ubi_dump_vol_info(const struct ubi_volume *vol) +{ + printf("Volume information dump:\n"); + printf("\tvol_id %d\n", vol->vol_id); + printf("\treserved_pebs %d\n", vol->reserved_pebs); + printf("\talignment %d\n", vol->alignment); + printf("\tdata_pad %d\n", vol->data_pad); + printf("\tvol_type %d\n", vol->vol_type); + printf("\tname_len %d\n", vol->name_len); + printf("\tusable_leb_size %d\n", vol->usable_leb_size); + printf("\tused_ebs %d\n", vol->used_ebs); + printf("\tused_bytes %lld\n", vol->used_bytes); + printf("\tlast_eb_bytes %d\n", vol->last_eb_bytes); + printf("\tcorrupted %d\n", vol->corrupted); + printf("\tupd_marker %d\n", vol->upd_marker); if (vol->name_len <= UBI_VOL_NAME_MAX && strnlen(vol->name, vol->name_len + 1) == vol->name_len) { - dbg_msg("name %s", vol->name); + printf("\tname %s\n", vol->name); } else { - dbg_msg("the 1st 5 characters of the name: %c%c%c%c%c", - vol->name[0], vol->name[1], vol->name[2], - vol->name[3], vol->name[4]); + printf("\t1st 5 characters of name: %c%c%c%c%c\n", + vol->name[0], vol->name[1], vol->name[2], + vol->name[3], vol->name[4]); } } /** - * ubi_dbg_dump_vtbl_record - dump a &struct ubi_vtbl_record object. + * ubi_dump_vtbl_record - dump a &struct ubi_vtbl_record object. * @r: the object to dump * @idx: volume table index */ -void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx) +void ubi_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx) { int name_len = be16_to_cpu(r->name_len); - dbg_msg("volume table record %d dump:", idx); - dbg_msg("reserved_pebs %d", be32_to_cpu(r->reserved_pebs)); - dbg_msg("alignment %d", be32_to_cpu(r->alignment)); - dbg_msg("data_pad %d", be32_to_cpu(r->data_pad)); - dbg_msg("vol_type %d", (int)r->vol_type); - dbg_msg("upd_marker %d", (int)r->upd_marker); - dbg_msg("name_len %d", name_len); + pr_err("Volume table record %d dump:\n", idx); + pr_err("\treserved_pebs %d\n", be32_to_cpu(r->reserved_pebs)); + pr_err("\talignment %d\n", be32_to_cpu(r->alignment)); + pr_err("\tdata_pad %d\n", be32_to_cpu(r->data_pad)); + pr_err("\tvol_type %d\n", (int)r->vol_type); + pr_err("\tupd_marker %d\n", (int)r->upd_marker); + pr_err("\tname_len %d\n", name_len); if (r->name[0] == '\0') { - dbg_msg("name NULL"); + pr_err("\tname NULL\n"); return; } if (name_len <= UBI_VOL_NAME_MAX && strnlen(&r->name[0], name_len + 1) == name_len) { - dbg_msg("name %s", &r->name[0]); + pr_err("\tname %s\n", &r->name[0]); } else { - dbg_msg("1st 5 characters of the name: %c%c%c%c%c", + pr_err("\t1st 5 characters of name: %c%c%c%c%c\n", r->name[0], r->name[1], r->name[2], r->name[3], r->name[4]); } - dbg_msg("crc %#08x", be32_to_cpu(r->crc)); + pr_err("\tcrc %#08x\n", be32_to_cpu(r->crc)); } /** - * ubi_dbg_dump_sv - dump a &struct ubi_scan_volume object. - * @sv: the object to dump + * ubi_dump_av - dump a &struct ubi_ainf_volume object. + * @av: the object to dump */ -void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv) +void ubi_dump_av(const struct ubi_ainf_volume *av) { - dbg_msg("volume scanning information dump:"); - dbg_msg("vol_id %d", sv->vol_id); - dbg_msg("highest_lnum %d", sv->highest_lnum); - dbg_msg("leb_count %d", sv->leb_count); - dbg_msg("compat %d", sv->compat); - dbg_msg("vol_type %d", sv->vol_type); - dbg_msg("used_ebs %d", sv->used_ebs); - dbg_msg("last_data_size %d", sv->last_data_size); - dbg_msg("data_pad %d", sv->data_pad); + pr_err("Volume attaching information dump:\n"); + pr_err("\tvol_id %d\n", av->vol_id); + pr_err("\thighest_lnum %d\n", av->highest_lnum); + pr_err("\tleb_count %d\n", av->leb_count); + pr_err("\tcompat %d\n", av->compat); + pr_err("\tvol_type %d\n", av->vol_type); + pr_err("\tused_ebs %d\n", av->used_ebs); + pr_err("\tlast_data_size %d\n", av->last_data_size); + pr_err("\tdata_pad %d\n", av->data_pad); } /** - * ubi_dbg_dump_seb - dump a &struct ubi_scan_leb object. - * @seb: the object to dump + * ubi_dump_aeb - dump a &struct ubi_ainf_peb object. + * @aeb: the object to dump * @type: object type: 0 - not corrupted, 1 - corrupted */ -void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type) +void ubi_dump_aeb(const struct ubi_ainf_peb *aeb, int type) { - dbg_msg("eraseblock scanning information dump:"); - dbg_msg("ec %d", seb->ec); - dbg_msg("pnum %d", seb->pnum); + pr_err("eraseblock attaching information dump:\n"); + pr_err("\tec %d\n", aeb->ec); + pr_err("\tpnum %d\n", aeb->pnum); if (type == 0) { - dbg_msg("lnum %d", seb->lnum); - dbg_msg("scrub %d", seb->scrub); - dbg_msg("sqnum %llu", seb->sqnum); - dbg_msg("leb_ver %u", seb->leb_ver); + pr_err("\tlnum %d\n", aeb->lnum); + pr_err("\tscrub %d\n", aeb->scrub); + pr_err("\tsqnum %llu\n", aeb->sqnum); } } /** - * ubi_dbg_dump_mkvol_req - dump a &struct ubi_mkvol_req object. + * ubi_dump_mkvol_req - dump a &struct ubi_mkvol_req object. * @req: the object to dump */ -void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req) +void ubi_dump_mkvol_req(const struct ubi_mkvol_req *req) { char nm[17]; - dbg_msg("volume creation request dump:"); - dbg_msg("vol_id %d", req->vol_id); - dbg_msg("alignment %d", req->alignment); - dbg_msg("bytes %lld", (long long)req->bytes); - dbg_msg("vol_type %d", req->vol_type); - dbg_msg("name_len %d", req->name_len); + pr_err("Volume creation request dump:\n"); + pr_err("\tvol_id %d\n", req->vol_id); + pr_err("\talignment %d\n", req->alignment); + pr_err("\tbytes %lld\n", (long long)req->bytes); + pr_err("\tvol_type %d\n", req->vol_type); + pr_err("\tname_len %d\n", req->name_len); memcpy(nm, req->name, 16); nm[16] = 0; - dbg_msg("the 1st 16 characters of the name: %s", nm); + pr_err("\t1st 16 characters of name: %s\n", nm); } -#endif /* CONFIG_MTD_UBI_DEBUG_MSG */ +#ifndef __UBOOT__ +/* + * Root directory for UBI stuff in debugfs. Contains sub-directories which + * contain the stuff specific to particular UBI devices. + */ +static struct dentry *dfs_rootdir; + +/** + * ubi_debugfs_init - create UBI debugfs directory. + * + * Create UBI debugfs directory. Returns zero in case of success and a negative + * error code in case of failure. + */ +int ubi_debugfs_init(void) +{ + if (!IS_ENABLED(CONFIG_DEBUG_FS)) + return 0; + + dfs_rootdir = debugfs_create_dir("ubi", NULL); + if (IS_ERR_OR_NULL(dfs_rootdir)) { + int err = dfs_rootdir ? -ENODEV : PTR_ERR(dfs_rootdir); + + ubi_err("cannot create \"ubi\" debugfs directory, error %d\n", + err); + return err; + } + + return 0; +} + +/** + * ubi_debugfs_exit - remove UBI debugfs directory. + */ +void ubi_debugfs_exit(void) +{ + if (IS_ENABLED(CONFIG_DEBUG_FS)) + debugfs_remove(dfs_rootdir); +} + +/* Read an UBI debugfs file */ +static ssize_t dfs_file_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + unsigned long ubi_num = (unsigned long)file->private_data; + struct dentry *dent = file->f_path.dentry; + struct ubi_device *ubi; + struct ubi_debug_info *d; + char buf[3]; + int val; + + ubi = ubi_get_device(ubi_num); + if (!ubi) + return -ENODEV; + d = &ubi->dbg; + + if (dent == d->dfs_chk_gen) + val = d->chk_gen; + else if (dent == d->dfs_chk_io) + val = d->chk_io; + else if (dent == d->dfs_disable_bgt) + val = d->disable_bgt; + else if (dent == d->dfs_emulate_bitflips) + val = d->emulate_bitflips; + else if (dent == d->dfs_emulate_io_failures) + val = d->emulate_io_failures; + else { + count = -EINVAL; + goto out; + } + + if (val) + buf[0] = '1'; + else + buf[0] = '0'; + buf[1] = '\n'; + buf[2] = 0x00; + + count = simple_read_from_buffer(user_buf, count, ppos, buf, 2); + +out: + ubi_put_device(ubi); + return count; +} + +/* Write an UBI debugfs file */ +static ssize_t dfs_file_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + unsigned long ubi_num = (unsigned long)file->private_data; + struct dentry *dent = file->f_path.dentry; + struct ubi_device *ubi; + struct ubi_debug_info *d; + size_t buf_size; + char buf[8]; + int val; + + ubi = ubi_get_device(ubi_num); + if (!ubi) + return -ENODEV; + d = &ubi->dbg; + + buf_size = min_t(size_t, count, (sizeof(buf) - 1)); + if (copy_from_user(buf, user_buf, buf_size)) { + count = -EFAULT; + goto out; + } + + if (buf[0] == '1') + val = 1; + else if (buf[0] == '0') + val = 0; + else { + count = -EINVAL; + goto out; + } + + if (dent == d->dfs_chk_gen) + d->chk_gen = val; + else if (dent == d->dfs_chk_io) + d->chk_io = val; + else if (dent == d->dfs_disable_bgt) + d->disable_bgt = val; + else if (dent == d->dfs_emulate_bitflips) + d->emulate_bitflips = val; + else if (dent == d->dfs_emulate_io_failures) + d->emulate_io_failures = val; + else + count = -EINVAL; + +out: + ubi_put_device(ubi); + return count; +} + +/* File operations for all UBI debugfs files */ +static const struct file_operations dfs_fops = { + .read = dfs_file_read, + .write = dfs_file_write, + .open = simple_open, + .llseek = no_llseek, + .owner = THIS_MODULE, +}; + +/** + * ubi_debugfs_init_dev - initialize debugfs for an UBI device. + * @ubi: UBI device description object + * + * This function creates all debugfs files for UBI device @ubi. Returns zero in + * case of success and a negative error code in case of failure. + */ +int ubi_debugfs_init_dev(struct ubi_device *ubi) +{ + int err, n; + unsigned long ubi_num = ubi->ubi_num; + const char *fname; + struct dentry *dent; + struct ubi_debug_info *d = &ubi->dbg; + + if (!IS_ENABLED(CONFIG_DEBUG_FS)) + return 0; + + n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN + 1, UBI_DFS_DIR_NAME, + ubi->ubi_num); + if (n == UBI_DFS_DIR_LEN) { + /* The array size is too small */ + fname = UBI_DFS_DIR_NAME; + dent = ERR_PTR(-EINVAL); + goto out; + } + + fname = d->dfs_dir_name; + dent = debugfs_create_dir(fname, dfs_rootdir); + if (IS_ERR_OR_NULL(dent)) + goto out; + d->dfs_dir = dent; + + fname = "chk_gen"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_gen = dent; + + fname = "chk_io"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_io = dent; + + fname = "tst_disable_bgt"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_disable_bgt = dent; + + fname = "tst_emulate_bitflips"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_emulate_bitflips = dent; + + fname = "tst_emulate_io_failures"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_emulate_io_failures = dent; + + return 0; + +out_remove: + debugfs_remove_recursive(d->dfs_dir); +out: + err = dent ? PTR_ERR(dent) : -ENODEV; + ubi_err("cannot create \"%s\" debugfs file or directory, error %d\n", + fname, err); + return err; +} + +/** + * dbg_debug_exit_dev - free all debugfs files corresponding to device @ubi + * @ubi: UBI device description object + */ +void ubi_debugfs_exit_dev(struct ubi_device *ubi) +{ + if (IS_ENABLED(CONFIG_DEBUG_FS)) + debugfs_remove_recursive(ubi->dbg.dfs_dir); +} +#else +int ubi_debugfs_init(void) +{ + return 0; +} + +void ubi_debugfs_exit(void) +{ +} + +int ubi_debugfs_init_dev(struct ubi_device *ubi) +{ + return 0; +} + +void ubi_debugfs_exit_dev(struct ubi_device *ubi) +{ +} +#endif diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h index 222b2b8..980eb11 100644 --- a/drivers/mtd/ubi/debug.h +++ b/drivers/mtd/ubi/debug.h @@ -9,132 +9,113 @@ #ifndef __UBI_DEBUG_H__ #define __UBI_DEBUG_H__ -#ifdef CONFIG_MTD_UBI_DEBUG -#ifdef UBI_LINUX -#include <linux/random.h> -#endif - -#define ubi_assert(expr) BUG_ON(!(expr)) -#define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__) -#else -#define ubi_assert(expr) ({}) -#define dbg_err(fmt, ...) ({}) -#endif - -#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT -#define DBG_DISABLE_BGT 1 -#else -#define DBG_DISABLE_BGT 0 -#endif - -#ifdef CONFIG_MTD_UBI_DEBUG_MSG -/* Generic debugging message */ -#define dbg_msg(fmt, ...) \ - printk(KERN_DEBUG "UBI DBG: %s: " fmt "\n", \ - __FUNCTION__, ##__VA_ARGS__) - -#define ubi_dbg_dump_stack() dump_stack() - -struct ubi_ec_hdr; -struct ubi_vid_hdr; -struct ubi_volume; -struct ubi_vtbl_record; -struct ubi_scan_volume; -struct ubi_scan_leb; -struct ubi_mkvol_req; - -void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr); -void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr); -void ubi_dbg_dump_vol_info(const struct ubi_volume *vol); -void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx); -void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv); -void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type); -void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req); - -#else - -#define dbg_msg(fmt, ...) ({}) -#define ubi_dbg_dump_stack() ({}) -#define ubi_dbg_dump_ec_hdr(ec_hdr) ({}) -#define ubi_dbg_dump_vid_hdr(vid_hdr) ({}) -#define ubi_dbg_dump_vol_info(vol) ({}) -#define ubi_dbg_dump_vtbl_record(r, idx) ({}) -#define ubi_dbg_dump_sv(sv) ({}) -#define ubi_dbg_dump_seb(seb, type) ({}) -#define ubi_dbg_dump_mkvol_req(req) ({}) - -#endif /* CONFIG_MTD_UBI_DEBUG_MSG */ - -#ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA -/* Messages from the eraseblock association unit */ -#define dbg_eba(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#else -#define dbg_eba(fmt, ...) ({}) -#endif - -#ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL -/* Messages from the wear-leveling unit */ -#define dbg_wl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#else -#define dbg_wl(fmt, ...) ({}) -#endif +void ubi_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len); +void ubi_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr); +void ubi_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr); -#ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO -/* Messages from the input/output unit */ -#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#else -#define dbg_io(fmt, ...) ({}) +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/random.h> #endif -#ifdef CONFIG_MTD_UBI_DEBUG_MSG_BLD +#define ubi_assert(expr) do { \ + if (unlikely(!(expr))) { \ + pr_crit("UBI assert failed in %s at %u (pid %d)\n", \ + __func__, __LINE__, current->pid); \ + dump_stack(); \ + } \ +} while (0) + +#define ubi_dbg_print_hex_dump(l, ps, pt, r, g, b, len, a) \ + print_hex_dump(l, ps, pt, r, g, b, len, a) + +#define ubi_dbg_msg(type, fmt, ...) \ + pr_debug("UBI DBG " type " (pid %d): " fmt "\n", current->pid, \ + ##__VA_ARGS__) + +/* General debugging messages */ +#define dbg_gen(fmt, ...) ubi_dbg_msg("gen", fmt, ##__VA_ARGS__) +/* Messages from the eraseblock association sub-system */ +#define dbg_eba(fmt, ...) ubi_dbg_msg("eba", fmt, ##__VA_ARGS__) +/* Messages from the wear-leveling sub-system */ +#define dbg_wl(fmt, ...) ubi_dbg_msg("wl", fmt, ##__VA_ARGS__) +/* Messages from the input/output sub-system */ +#define dbg_io(fmt, ...) ubi_dbg_msg("io", fmt, ##__VA_ARGS__) /* Initialization and build messages */ -#define dbg_bld(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#else -#define dbg_bld(fmt, ...) ({}) -#endif +#define dbg_bld(fmt, ...) ubi_dbg_msg("bld", fmt, ##__VA_ARGS__) + +void ubi_dump_vol_info(const struct ubi_volume *vol); +void ubi_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx); +void ubi_dump_av(const struct ubi_ainf_volume *av); +void ubi_dump_aeb(const struct ubi_ainf_peb *aeb, int type); +void ubi_dump_mkvol_req(const struct ubi_mkvol_req *req); +int ubi_self_check_all_ff(struct ubi_device *ubi, int pnum, int offset, + int len); +int ubi_debugfs_init(void); +void ubi_debugfs_exit(void); +int ubi_debugfs_init_dev(struct ubi_device *ubi); +void ubi_debugfs_exit_dev(struct ubi_device *ubi); + +/** + * ubi_dbg_is_bgt_disabled - if the background thread is disabled. + * @ubi: UBI device description object + * + * Returns non-zero if the UBI background thread is disabled for testing + * purposes. + */ +static inline int ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi) +{ + return ubi->dbg.disable_bgt; +} -#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS /** * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip. + * @ubi: UBI device description object * * Returns non-zero if a bit-flip should be emulated, otherwise returns zero. */ -static inline int ubi_dbg_is_bitflip(void) +static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) { - return !(random32() % 200); + if (ubi->dbg.emulate_bitflips) + return !(prandom_u32() % 200); + return 0; } -#else -#define ubi_dbg_is_bitflip() 0 -#endif -#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_WRITE_FAILURES /** * ubi_dbg_is_write_failure - if it is time to emulate a write failure. + * @ubi: UBI device description object * * Returns non-zero if a write failure should be emulated, otherwise returns * zero. */ -static inline int ubi_dbg_is_write_failure(void) +static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi) { - return !(random32() % 500); + if (ubi->dbg.emulate_io_failures) + return !(prandom_u32() % 500); + return 0; } -#else -#define ubi_dbg_is_write_failure() 0 -#endif -#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_ERASE_FAILURES /** * ubi_dbg_is_erase_failure - if its time to emulate an erase failure. + * @ubi: UBI device description object * * Returns non-zero if an erase failure should be emulated, otherwise returns * zero. */ -static inline int ubi_dbg_is_erase_failure(void) +static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi) { - return !(random32() % 400); + if (ubi->dbg.emulate_io_failures) + return !(prandom_u32() % 400); + return 0; +} + +static inline int ubi_dbg_chk_io(const struct ubi_device *ubi) +{ + return ubi->dbg.chk_io; } -#else -#define ubi_dbg_is_erase_failure() 0 -#endif +static inline int ubi_dbg_chk_gen(const struct ubi_device *ubi) +{ + return ubi->dbg.chk_gen; +} #endif /* !__UBI_DEBUG_H__ */ diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index 7d27eda..3c2a7e6 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -7,20 +7,20 @@ */ /* - * The UBI Eraseblock Association (EBA) unit. + * The UBI Eraseblock Association (EBA) sub-system. * - * This unit is responsible for I/O to/from logical eraseblock. + * This sub-system is responsible for I/O to/from logical eraseblock. * * Although in this implementation the EBA table is fully kept and managed in * RAM, which assumes poor scalability, it might be (partially) maintained on * flash in future implementations. * - * The EBA unit implements per-logical eraseblock locking. Before accessing a - * logical eraseblock it is locked for reading or writing. The per-logical - * eraseblock locking is implemented by means of the lock tree. The lock tree - * is an RB-tree which refers all the currently locked logical eraseblocks. The - * lock tree elements are &struct ubi_ltree_entry objects. They are indexed by - * (@vol_id, @lnum) pairs. + * The EBA sub-system implements per-logical eraseblock locking. Before + * accessing a logical eraseblock it is locked for reading or writing. The + * per-logical eraseblock locking is implemented by means of the lock tree. The + * lock tree is an RB-tree which refers all the currently locked logical + * eraseblocks. The lock tree elements are &struct ubi_ltree_entry objects. + * They are indexed by (@vol_id, @lnum) pairs. * * EBA also maintains the global sequence counter which is incremented each * time a logical eraseblock is mapped to a physical eraseblock and it is @@ -29,13 +29,15 @@ * 64 bits is enough to never overflow. */ -#ifdef UBI_LINUX +#define __UBOOT__ +#ifndef __UBOOT__ #include <linux/slab.h> #include <linux/crc32.h> -#include <linux/err.h> +#else +#include <ubi_uboot.h> #endif -#include <ubi_uboot.h> +#include <linux/err.h> #include "ubi.h" /* Number of physical eraseblocks reserved for atomic LEB change operation */ @@ -49,7 +51,7 @@ * global sequence counter value. It also increases the global sequence * counter. */ -static unsigned long long next_sqnum(struct ubi_device *ubi) +unsigned long long ubi_next_sqnum(struct ubi_device *ubi) { unsigned long long sqnum; @@ -181,9 +183,7 @@ static struct ubi_ltree_entry *ltree_add_entry(struct ubi_device *ubi, le->users += 1; spin_unlock(&ubi->ltree_lock); - if (le_free) - kfree(le_free); - + kfree(le_free); return le; } @@ -215,22 +215,18 @@ static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum) */ static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum) { - int _free = 0; struct ubi_ltree_entry *le; spin_lock(&ubi->ltree_lock); le = ltree_lookup(ubi, vol_id, lnum); le->users -= 1; ubi_assert(le->users >= 0); + up_read(&le->mutex); if (le->users == 0) { rb_erase(&le->rb, &ubi->ltree); - _free = 1; + kfree(le); } spin_unlock(&ubi->ltree_lock); - - up_read(&le->mutex); - if (_free) - kfree(le); } /** @@ -266,7 +262,6 @@ static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum) */ static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum) { - int _free; struct ubi_ltree_entry *le; le = ltree_add_entry(ubi, vol_id, lnum); @@ -281,12 +276,9 @@ static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum) ubi_assert(le->users >= 0); if (le->users == 0) { rb_erase(&le->rb, &ubi->ltree); - _free = 1; - } else - _free = 0; - spin_unlock(&ubi->ltree_lock); - if (_free) kfree(le); + } + spin_unlock(&ubi->ltree_lock); return 1; } @@ -299,23 +291,18 @@ static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum) */ static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum) { - int _free; struct ubi_ltree_entry *le; spin_lock(&ubi->ltree_lock); le = ltree_lookup(ubi, vol_id, lnum); le->users -= 1; ubi_assert(le->users >= 0); + up_write(&le->mutex); if (le->users == 0) { rb_erase(&le->rb, &ubi->ltree); - _free = 1; - } else - _free = 0; - spin_unlock(&ubi->ltree_lock); - - up_write(&le->mutex); - if (_free) kfree(le); + } + spin_unlock(&ubi->ltree_lock); } /** @@ -347,8 +334,10 @@ int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol, dbg_eba("erase LEB %d:%d, PEB %d", vol_id, lnum, pnum); + down_read(&ubi->fm_sem); vol->eba_tbl[lnum] = UBI_LEB_UNMAPPED; - err = ubi_wl_put_peb(ubi, pnum, 0); + up_read(&ubi->fm_sem); + err = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 0); out_unlock: leb_write_unlock(ubi, vol_id, lnum); @@ -425,9 +414,10 @@ retry: * may try to recover data. FIXME: but this is * not implemented. */ - if (err == UBI_IO_BAD_VID_HDR) { - ubi_warn("bad VID header at PEB %d, LEB" - "%d:%d", pnum, vol_id, lnum); + if (err == UBI_IO_BAD_HDR_EBADMSG || + err == UBI_IO_BAD_HDR) { + ubi_warn("corrupted VID header at PEB %d, LEB %d:%d", + pnum, vol_id, lnum); err = -EBADMSG; } else ubi_ro_mode(ubi); @@ -508,16 +498,12 @@ static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum, struct ubi_vid_hdr *vid_hdr; vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); - if (!vid_hdr) { + if (!vid_hdr) return -ENOMEM; - } - - mutex_lock(&ubi->buf_mutex); retry: - new_pnum = ubi_wl_get_peb(ubi, UBI_UNKNOWN); + new_pnum = ubi_wl_get_peb(ubi); if (new_pnum < 0) { - mutex_unlock(&ubi->buf_mutex); ubi_free_vid_hdr(ubi, vid_hdr); return new_pnum; } @@ -531,39 +517,45 @@ retry: goto out_put; } - vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr); if (err) goto write_error; data_size = offset + len; - memset(ubi->peb_buf1 + offset, 0xFF, len); + mutex_lock(&ubi->buf_mutex); + memset(ubi->peb_buf + offset, 0xFF, len); /* Read everything before the area where the write failure happened */ if (offset > 0) { - err = ubi_io_read_data(ubi, ubi->peb_buf1, pnum, 0, offset); + err = ubi_io_read_data(ubi, ubi->peb_buf, pnum, 0, offset); if (err && err != UBI_IO_BITFLIPS) - goto out_put; + goto out_unlock; } - memcpy(ubi->peb_buf1 + offset, buf, len); + memcpy(ubi->peb_buf + offset, buf, len); - err = ubi_io_write_data(ubi, ubi->peb_buf1, new_pnum, 0, data_size); - if (err) + err = ubi_io_write_data(ubi, ubi->peb_buf, new_pnum, 0, data_size); + if (err) { + mutex_unlock(&ubi->buf_mutex); goto write_error; + } mutex_unlock(&ubi->buf_mutex); ubi_free_vid_hdr(ubi, vid_hdr); + down_read(&ubi->fm_sem); vol->eba_tbl[lnum] = new_pnum; - ubi_wl_put_peb(ubi, pnum, 1); + up_read(&ubi->fm_sem); + ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1); ubi_msg("data was successfully recovered"); return 0; -out_put: +out_unlock: mutex_unlock(&ubi->buf_mutex); - ubi_wl_put_peb(ubi, new_pnum, 1); +out_put: + ubi_wl_put_peb(ubi, vol_id, lnum, new_pnum, 1); ubi_free_vid_hdr(ubi, vid_hdr); return err; @@ -573,9 +565,8 @@ write_error: * get another one. */ ubi_warn("failed to write to PEB %d", new_pnum); - ubi_wl_put_peb(ubi, new_pnum, 1); + ubi_wl_put_peb(ubi, vol_id, lnum, new_pnum, 1); if (++tries > UBI_IO_RETRIES) { - mutex_unlock(&ubi->buf_mutex); ubi_free_vid_hdr(ubi, vid_hdr); return err; } @@ -591,7 +582,6 @@ write_error: * @buf: the data to write * @offset: offset within the logical eraseblock where to write * @len: how many bytes to write - * @dtype: data type * * This function writes data to logical eraseblock @lnum of a dynamic volume * @vol. Returns zero in case of success and a negative error code in case @@ -599,7 +589,7 @@ write_error: * written to the flash media, but may be some garbage. */ int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, - const void *buf, int offset, int len, int dtype) + const void *buf, int offset, int len) { int err, pnum, tries = 0, vol_id = vol->vol_id; struct ubi_vid_hdr *vid_hdr; @@ -640,14 +630,14 @@ int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, } vid_hdr->vol_type = UBI_VID_DYNAMIC; - vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); vid_hdr->vol_id = cpu_to_be32(vol_id); vid_hdr->lnum = cpu_to_be32(lnum); vid_hdr->compat = ubi_get_compat(ubi, vol_id); vid_hdr->data_pad = cpu_to_be32(vol->data_pad); retry: - pnum = ubi_wl_get_peb(ubi, dtype); + pnum = ubi_wl_get_peb(ubi); if (pnum < 0) { ubi_free_vid_hdr(ubi, vid_hdr); leb_write_unlock(ubi, vol_id, lnum); @@ -667,14 +657,15 @@ retry: if (len) { err = ubi_io_write_data(ubi, buf, pnum, offset, len); if (err) { - ubi_warn("failed to write %d bytes at offset %d of " - "LEB %d:%d, PEB %d", len, offset, vol_id, - lnum, pnum); + ubi_warn("failed to write %d bytes at offset %d of LEB %d:%d, PEB %d", + len, offset, vol_id, lnum, pnum); goto write_error; } } + down_read(&ubi->fm_sem); vol->eba_tbl[lnum] = pnum; + up_read(&ubi->fm_sem); leb_write_unlock(ubi, vol_id, lnum); ubi_free_vid_hdr(ubi, vid_hdr); @@ -693,7 +684,7 @@ write_error: * eraseblock, so just put it and request a new one. We assume that if * this physical eraseblock went bad, the erase code will handle that. */ - err = ubi_wl_put_peb(ubi, pnum, 1); + err = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1); if (err || ++tries > UBI_IO_RETRIES) { ubi_ro_mode(ubi); leb_write_unlock(ubi, vol_id, lnum); @@ -701,7 +692,7 @@ write_error: return err; } - vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); ubi_msg("try another PEB"); goto retry; } @@ -713,7 +704,6 @@ write_error: * @lnum: logical eraseblock number * @buf: data to write * @len: how many bytes to write - * @dtype: data type * @used_ebs: how many logical eraseblocks will this volume contain * * This function writes data to logical eraseblock @lnum of static volume @@ -725,13 +715,12 @@ write_error: * to the real data size, although the @buf buffer has to contain the * alignment. In all other cases, @len has to be aligned. * - * It is prohibited to write more then once to logical eraseblocks of static + * It is prohibited to write more than once to logical eraseblocks of static * volumes. This function returns zero in case of success and a negative error * code in case of failure. */ int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol, - int lnum, const void *buf, int len, int dtype, - int used_ebs) + int lnum, const void *buf, int len, int used_ebs) { int err, pnum, tries = 0, data_size = len, vol_id = vol->vol_id; struct ubi_vid_hdr *vid_hdr; @@ -756,7 +745,7 @@ int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol, return err; } - vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); vid_hdr->vol_id = cpu_to_be32(vol_id); vid_hdr->lnum = cpu_to_be32(lnum); vid_hdr->compat = ubi_get_compat(ubi, vol_id); @@ -769,7 +758,7 @@ int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol, vid_hdr->data_crc = cpu_to_be32(crc); retry: - pnum = ubi_wl_get_peb(ubi, dtype); + pnum = ubi_wl_get_peb(ubi); if (pnum < 0) { ubi_free_vid_hdr(ubi, vid_hdr); leb_write_unlock(ubi, vol_id, lnum); @@ -794,7 +783,9 @@ retry: } ubi_assert(vol->eba_tbl[lnum] < 0); + down_read(&ubi->fm_sem); vol->eba_tbl[lnum] = pnum; + up_read(&ubi->fm_sem); leb_write_unlock(ubi, vol_id, lnum); ubi_free_vid_hdr(ubi, vid_hdr); @@ -813,7 +804,7 @@ write_error: return err; } - err = ubi_wl_put_peb(ubi, pnum, 1); + err = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1); if (err || ++tries > UBI_IO_RETRIES) { ubi_ro_mode(ubi); leb_write_unlock(ubi, vol_id, lnum); @@ -821,7 +812,7 @@ write_error: return err; } - vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); ubi_msg("try another PEB"); goto retry; } @@ -833,7 +824,6 @@ write_error: * @lnum: logical eraseblock number * @buf: data to write * @len: how many bytes to write - * @dtype: data type * * This function changes the contents of a logical eraseblock atomically. @buf * has to contain new logical eraseblock data, and @len - the length of the @@ -845,7 +835,7 @@ write_error: * LEB change may be done at a time. This is ensured by @ubi->alc_mutex. */ int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, - int lnum, const void *buf, int len, int dtype) + int lnum, const void *buf, int len) { int err, pnum, tries = 0, vol_id = vol->vol_id; struct ubi_vid_hdr *vid_hdr; @@ -862,7 +852,7 @@ int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, err = ubi_eba_unmap_leb(ubi, vol, lnum); if (err) return err; - return ubi_eba_write_leb(ubi, vol, lnum, NULL, 0, 0, dtype); + return ubi_eba_write_leb(ubi, vol, lnum, NULL, 0, 0); } vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); @@ -874,7 +864,7 @@ int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, if (err) goto out_mutex; - vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); vid_hdr->vol_id = cpu_to_be32(vol_id); vid_hdr->lnum = cpu_to_be32(lnum); vid_hdr->compat = ubi_get_compat(ubi, vol_id); @@ -887,7 +877,7 @@ int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, vid_hdr->data_crc = cpu_to_be32(crc); retry: - pnum = ubi_wl_get_peb(ubi, dtype); + pnum = ubi_wl_get_peb(ubi); if (pnum < 0) { err = pnum; goto out_leb_unlock; @@ -911,12 +901,14 @@ retry: } if (vol->eba_tbl[lnum] >= 0) { - err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1); + err = ubi_wl_put_peb(ubi, vol_id, lnum, vol->eba_tbl[lnum], 0); if (err) goto out_leb_unlock; } + down_read(&ubi->fm_sem); vol->eba_tbl[lnum] = pnum; + up_read(&ubi->fm_sem); out_leb_unlock: leb_write_unlock(ubi, vol_id, lnum); @@ -936,18 +928,45 @@ write_error: goto out_leb_unlock; } - err = ubi_wl_put_peb(ubi, pnum, 1); + err = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1); if (err || ++tries > UBI_IO_RETRIES) { ubi_ro_mode(ubi); goto out_leb_unlock; } - vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); ubi_msg("try another PEB"); goto retry; } /** + * is_error_sane - check whether a read error is sane. + * @err: code of the error happened during reading + * + * This is a helper function for 'ubi_eba_copy_leb()' which is called when we + * cannot read data from the target PEB (an error @err happened). If the error + * code is sane, then we treat this error as non-fatal. Otherwise the error is + * fatal and UBI will be switched to R/O mode later. + * + * The idea is that we try not to switch to R/O mode if the read error is + * something which suggests there was a real read problem. E.g., %-EIO. Or a + * memory allocation failed (-%ENOMEM). Otherwise, it is safer to switch to R/O + * mode, simply because we do not know what happened at the MTD level, and we + * cannot handle this. E.g., the underlying driver may have become crazy, and + * it is safer to switch to R/O mode to preserve the data. + * + * And bear in mind, this is about reading from the target PEB, i.e. the PEB + * which we have just written. + */ +static int is_error_sane(int err) +{ + if (err == -EIO || err == -ENOMEM || err == UBI_IO_BAD_HDR || + err == UBI_IO_BAD_HDR_EBADMSG || err == -ETIMEDOUT) + return 0; + return 1; +} + +/** * ubi_eba_copy_leb - copy logical eraseblock. * @ubi: UBI device description object * @from: physical eraseblock number from where to copy @@ -957,10 +976,9 @@ write_error: * This function copies logical eraseblock from physical eraseblock @from to * physical eraseblock @to. The @vid_hdr buffer may be changed by this * function. Returns: - * o %0 in case of success; - * o %1 if the operation was canceled and should be tried later (e.g., - * because a bit-flip was detected at the target PEB); - * o %2 if the volume is being deleted and this LEB should not be moved. + * o %0 in case of success; + * o %MOVE_CANCEL_RACE, %MOVE_TARGET_WR_ERR, %MOVE_TARGET_BITFLIPS, etc; + * o a negative error code in case of failure. */ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, struct ubi_vid_hdr *vid_hdr) @@ -972,7 +990,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, vol_id = be32_to_cpu(vid_hdr->vol_id); lnum = be32_to_cpu(vid_hdr->lnum); - dbg_eba("copy LEB %d:%d, PEB %d to PEB %d", vol_id, lnum, from, to); + dbg_wl("copy LEB %d:%d, PEB %d to PEB %d", vol_id, lnum, from, to); if (vid_hdr->vol_type == UBI_VID_STATIC) { data_size = be32_to_cpu(vid_hdr->data_size); @@ -986,17 +1004,16 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, /* * Note, we may race with volume deletion, which means that the volume * this logical eraseblock belongs to might be being deleted. Since the - * volume deletion unmaps all the volume's logical eraseblocks, it will + * volume deletion un-maps all the volume's logical eraseblocks, it will * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish. */ vol = ubi->volumes[idx]; + spin_unlock(&ubi->volumes_lock); if (!vol) { /* No need to do further work, cancel */ - dbg_eba("volume %d is being removed, cancel", vol_id); - spin_unlock(&ubi->volumes_lock); - return 2; + dbg_wl("volume %d is being removed, cancel", vol_id); + return MOVE_CANCEL_RACE; } - spin_unlock(&ubi->volumes_lock); /* * We do not want anybody to write to this logical eraseblock while we @@ -1008,12 +1025,15 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, * (@from). This task locks the LEB and goes sleep in the * 'ubi_wl_put_peb()' function on the @ubi->move_mutex. In turn, we are * holding @ubi->move_mutex and go sleep on the LEB lock. So, if the - * LEB is already locked, we just do not move it and return %1. + * LEB is already locked, we just do not move it and return + * %MOVE_RETRY. Note, we do not return %MOVE_CANCEL_RACE here because + * we do not know the reasons of the contention - it may be just a + * normal I/O on this LEB, so we want to re-try. */ err = leb_write_trylock(ubi, vol_id, lnum); if (err) { - dbg_eba("contention on LEB %d:%d, cancel", vol_id, lnum); - return err; + dbg_wl("contention on LEB %d:%d, cancel", vol_id, lnum); + return MOVE_RETRY; } /* @@ -1022,30 +1042,30 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, * cancel it. */ if (vol->eba_tbl[lnum] != from) { - dbg_eba("LEB %d:%d is no longer mapped to PEB %d, mapped to " - "PEB %d, cancel", vol_id, lnum, from, - vol->eba_tbl[lnum]); - err = 1; + dbg_wl("LEB %d:%d is no longer mapped to PEB %d, mapped to PEB %d, cancel", + vol_id, lnum, from, vol->eba_tbl[lnum]); + err = MOVE_CANCEL_RACE; goto out_unlock_leb; } /* - * OK, now the LEB is locked and we can safely start moving iy. Since - * this function utilizes thie @ubi->peb1_buf buffer which is shared - * with some other functions, so lock the buffer by taking the + * OK, now the LEB is locked and we can safely start moving it. Since + * this function utilizes the @ubi->peb_buf buffer which is shared + * with some other functions - we lock the buffer by taking the * @ubi->buf_mutex. */ mutex_lock(&ubi->buf_mutex); - dbg_eba("read %d bytes of data", aldata_size); - err = ubi_io_read_data(ubi, ubi->peb_buf1, from, 0, aldata_size); + dbg_wl("read %d bytes of data", aldata_size); + err = ubi_io_read_data(ubi, ubi->peb_buf, from, 0, aldata_size); if (err && err != UBI_IO_BITFLIPS) { ubi_warn("error %d while reading data from PEB %d", err, from); + err = MOVE_SOURCE_RD_ERR; goto out_unlock_buf; } /* - * Now we have got to calculate how much data we have to to copy. In + * Now we have got to calculate how much data we have to copy. In * case of a static volume it is fairly easy - the VID header contains * the data size. In case of a dynamic volume it is more difficult - we * have to read the contents, cut 0xFF bytes from the end and copy only @@ -1056,14 +1076,14 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, */ if (vid_hdr->vol_type == UBI_VID_DYNAMIC) aldata_size = data_size = - ubi_calc_data_len(ubi, ubi->peb_buf1, data_size); + ubi_calc_data_len(ubi, ubi->peb_buf, data_size); cond_resched(); - crc = crc32(UBI_CRC32_INIT, ubi->peb_buf1, data_size); + crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size); cond_resched(); /* - * It may turn out to me that the whole @from physical eraseblock + * It may turn out to be that the whole @from physical eraseblock * contains only 0xFF bytes. Then we have to only write the VID header * and do not write any data. This also means we should not set * @vid_hdr->copy_flag, @vid_hdr->data_size, and @vid_hdr->data_crc. @@ -1073,28 +1093,37 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, vid_hdr->data_size = cpu_to_be32(data_size); vid_hdr->data_crc = cpu_to_be32(crc); } - vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); err = ubi_io_write_vid_hdr(ubi, to, vid_hdr); - if (err) + if (err) { + if (err == -EIO) + err = MOVE_TARGET_WR_ERR; goto out_unlock_buf; + } cond_resched(); /* Read the VID header back and check if it was written correctly */ err = ubi_io_read_vid_hdr(ubi, to, vid_hdr, 1); if (err) { - if (err != UBI_IO_BITFLIPS) - ubi_warn("cannot read VID header back from PEB %d", to); - else - err = 1; + if (err != UBI_IO_BITFLIPS) { + ubi_warn("error %d while reading VID header back from PEB %d", + err, to); + if (is_error_sane(err)) + err = MOVE_TARGET_RD_ERR; + } else + err = MOVE_TARGET_BITFLIPS; goto out_unlock_buf; } if (data_size > 0) { - err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size); - if (err) + err = ubi_io_write_data(ubi, ubi->peb_buf, to, 0, aldata_size); + if (err) { + if (err == -EIO) + err = MOVE_TARGET_WR_ERR; goto out_unlock_buf; + } cond_resched(); @@ -1102,28 +1131,33 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, * We've written the data and are going to read it back to make * sure it was written correctly. */ - - err = ubi_io_read_data(ubi, ubi->peb_buf2, to, 0, aldata_size); + memset(ubi->peb_buf, 0xFF, aldata_size); + err = ubi_io_read_data(ubi, ubi->peb_buf, to, 0, aldata_size); if (err) { - if (err != UBI_IO_BITFLIPS) - ubi_warn("cannot read data back from PEB %d", - to); - else - err = 1; + if (err != UBI_IO_BITFLIPS) { + ubi_warn("error %d while reading data back from PEB %d", + err, to); + if (is_error_sane(err)) + err = MOVE_TARGET_RD_ERR; + } else + err = MOVE_TARGET_BITFLIPS; goto out_unlock_buf; } cond_resched(); - if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) { - ubi_warn("read data back from PEB %d - it is different", + if (crc != crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size)) { + ubi_warn("read data back from PEB %d and it is different", to); + err = -EINVAL; goto out_unlock_buf; } } ubi_assert(vol->eba_tbl[lnum] == from); + down_read(&ubi->fm_sem); vol->eba_tbl[lnum] = to; + up_read(&ubi->fm_sem); out_unlock_buf: mutex_unlock(&ubi->buf_mutex); @@ -1133,28 +1167,165 @@ out_unlock_leb: } /** - * ubi_eba_init_scan - initialize the EBA unit using scanning information. + * print_rsvd_warning - warn about not having enough reserved PEBs. * @ubi: UBI device description object - * @si: scanning information + * + * This is a helper function for 'ubi_eba_init()' which is called when UBI + * cannot reserve enough PEBs for bad block handling. This function makes a + * decision whether we have to print a warning or not. The algorithm is as + * follows: + * o if this is a new UBI image, then just print the warning + * o if this is an UBI image which has already been used for some time, print + * a warning only if we can reserve less than 10% of the expected amount of + * the reserved PEB. + * + * The idea is that when UBI is used, PEBs become bad, and the reserved pool + * of PEBs becomes smaller, which is normal and we do not want to scare users + * with a warning every time they attach the MTD device. This was an issue + * reported by real users. + */ +static void print_rsvd_warning(struct ubi_device *ubi, + struct ubi_attach_info *ai) +{ + /* + * The 1 << 18 (256KiB) number is picked randomly, just a reasonably + * large number to distinguish between newly flashed and used images. + */ + if (ai->max_sqnum > (1 << 18)) { + int min = ubi->beb_rsvd_level / 10; + + if (!min) + min = 1; + if (ubi->beb_rsvd_pebs > min) + return; + } + + ubi_warn("cannot reserve enough PEBs for bad PEB handling, reserved %d, need %d", + ubi->beb_rsvd_pebs, ubi->beb_rsvd_level); + if (ubi->corr_peb_count) + ubi_warn("%d PEBs are corrupted and not used", + ubi->corr_peb_count); +} + +/** + * self_check_eba - run a self check on the EBA table constructed by fastmap. + * @ubi: UBI device description object + * @ai_fastmap: UBI attach info object created by fastmap + * @ai_scan: UBI attach info object created by scanning + * + * Returns < 0 in case of an internal error, 0 otherwise. + * If a bad EBA table entry was found it will be printed out and + * ubi_assert() triggers. + */ +int self_check_eba(struct ubi_device *ubi, struct ubi_attach_info *ai_fastmap, + struct ubi_attach_info *ai_scan) +{ + int i, j, num_volumes, ret = 0; + int **scan_eba, **fm_eba; + struct ubi_ainf_volume *av; + struct ubi_volume *vol; + struct ubi_ainf_peb *aeb; + struct rb_node *rb; + + num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; + + scan_eba = kmalloc(sizeof(*scan_eba) * num_volumes, GFP_KERNEL); + if (!scan_eba) + return -ENOMEM; + + fm_eba = kmalloc(sizeof(*fm_eba) * num_volumes, GFP_KERNEL); + if (!fm_eba) { + kfree(scan_eba); + return -ENOMEM; + } + + for (i = 0; i < num_volumes; i++) { + vol = ubi->volumes[i]; + if (!vol) + continue; + + scan_eba[i] = kmalloc(vol->reserved_pebs * sizeof(**scan_eba), + GFP_KERNEL); + if (!scan_eba[i]) { + ret = -ENOMEM; + goto out_free; + } + + fm_eba[i] = kmalloc(vol->reserved_pebs * sizeof(**fm_eba), + GFP_KERNEL); + if (!fm_eba[i]) { + ret = -ENOMEM; + goto out_free; + } + + for (j = 0; j < vol->reserved_pebs; j++) + scan_eba[i][j] = fm_eba[i][j] = UBI_LEB_UNMAPPED; + + av = ubi_find_av(ai_scan, idx2vol_id(ubi, i)); + if (!av) + continue; + + ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) + scan_eba[i][aeb->lnum] = aeb->pnum; + + av = ubi_find_av(ai_fastmap, idx2vol_id(ubi, i)); + if (!av) + continue; + + ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) + fm_eba[i][aeb->lnum] = aeb->pnum; + + for (j = 0; j < vol->reserved_pebs; j++) { + if (scan_eba[i][j] != fm_eba[i][j]) { + if (scan_eba[i][j] == UBI_LEB_UNMAPPED || + fm_eba[i][j] == UBI_LEB_UNMAPPED) + continue; + + ubi_err("LEB:%i:%i is PEB:%i instead of %i!", + vol->vol_id, i, fm_eba[i][j], + scan_eba[i][j]); + ubi_assert(0); + } + } + } + +out_free: + for (i = 0; i < num_volumes; i++) { + if (!ubi->volumes[i]) + continue; + + kfree(scan_eba[i]); + kfree(fm_eba[i]); + } + + kfree(scan_eba); + kfree(fm_eba); + return ret; +} + +/** + * ubi_eba_init - initialize the EBA sub-system using attaching information. + * @ubi: UBI device description object + * @ai: attaching information * * This function returns zero in case of success and a negative error code in * case of failure. */ -int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) +int ubi_eba_init(struct ubi_device *ubi, struct ubi_attach_info *ai) { int i, j, err, num_volumes; - struct ubi_scan_volume *sv; + struct ubi_ainf_volume *av; struct ubi_volume *vol; - struct ubi_scan_leb *seb; + struct ubi_ainf_peb *aeb; struct rb_node *rb; - dbg_eba("initialize EBA unit"); + dbg_eba("initialize EBA sub-system"); spin_lock_init(&ubi->ltree_lock); mutex_init(&ubi->alc_mutex); ubi->ltree = RB_ROOT; - ubi->global_sqnum = si->max_sqnum + 1; + ubi->global_sqnum = ai->max_sqnum + 1; num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; for (i = 0; i < num_volumes; i++) { @@ -1174,24 +1345,27 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) for (j = 0; j < vol->reserved_pebs; j++) vol->eba_tbl[j] = UBI_LEB_UNMAPPED; - sv = ubi_scan_find_sv(si, idx2vol_id(ubi, i)); - if (!sv) + av = ubi_find_av(ai, idx2vol_id(ubi, i)); + if (!av) continue; - ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) { - if (seb->lnum >= vol->reserved_pebs) + ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) { + if (aeb->lnum >= vol->reserved_pebs) /* * This may happen in case of an unclean reboot * during re-size. */ - ubi_scan_move_to_list(sv, seb, &si->erase); - vol->eba_tbl[seb->lnum] = seb->pnum; + ubi_move_aeb_to_list(av, aeb, &ai->erase); + vol->eba_tbl[aeb->lnum] = aeb->pnum; } } if (ubi->avail_pebs < EBA_RESERVED_PEBS) { ubi_err("no enough physical eraseblocks (%d, need %d)", ubi->avail_pebs, EBA_RESERVED_PEBS); + if (ubi->corr_peb_count) + ubi_err("%d PEBs are corrupted and not used", + ubi->corr_peb_count); err = -ENOSPC; goto out_free; } @@ -1204,9 +1378,7 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) if (ubi->avail_pebs < ubi->beb_rsvd_level) { /* No enough free physical eraseblocks */ ubi->beb_rsvd_pebs = ubi->avail_pebs; - ubi_warn("cannot reserve enough PEBs for bad PEB " - "handling, reserved %d, need %d", - ubi->beb_rsvd_pebs, ubi->beb_rsvd_level); + print_rsvd_warning(ubi, ai); } else ubi->beb_rsvd_pebs = ubi->beb_rsvd_level; @@ -1214,7 +1386,7 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) ubi->rsvd_pebs += ubi->beb_rsvd_pebs; } - dbg_eba("EBA unit is initialized"); + dbg_eba("EBA sub-system is initialized"); return 0; out_free: @@ -1222,23 +1394,7 @@ out_free: if (!ubi->volumes[i]) continue; kfree(ubi->volumes[i]->eba_tbl); + ubi->volumes[i]->eba_tbl = NULL; } return err; } - -/** - * ubi_eba_close - close EBA unit. - * @ubi: UBI device description object - */ -void ubi_eba_close(const struct ubi_device *ubi) -{ - int i, num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; - - dbg_eba("close EBA unit"); - - for (i = 0; i < num_volumes; i++) { - if (!ubi->volumes[i]) - continue; - kfree(ubi->volumes[i]->eba_tbl); - } -} diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c new file mode 100644 index 0000000..787522f --- /dev/null +++ b/drivers/mtd/ubi/fastmap.c @@ -0,0 +1,1584 @@ +/* + * Copyright (c) 2012 Linutronix GmbH + * Author: Richard Weinberger <richard@nod.at> + * + * SPDX-License-Identifier: GPL-2.0+ + * + */ + +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/crc32.h> +#else +#include <div64.h> +#include <malloc.h> +#include <ubi_uboot.h> +#endif + +#include <linux/compat.h> +#include <linux/math64.h> +#include "ubi.h" + +/** + * ubi_calc_fm_size - calculates the fastmap size in bytes for an UBI device. + * @ubi: UBI device description object + */ +size_t ubi_calc_fm_size(struct ubi_device *ubi) +{ + size_t size; + + size = sizeof(struct ubi_fm_hdr) + \ + sizeof(struct ubi_fm_scan_pool) + \ + sizeof(struct ubi_fm_scan_pool) + \ + (ubi->peb_count * sizeof(struct ubi_fm_ec)) + \ + (sizeof(struct ubi_fm_eba) + \ + (ubi->peb_count * sizeof(__be32))) + \ + sizeof(struct ubi_fm_volhdr) * UBI_MAX_VOLUMES; + return roundup(size, ubi->leb_size); +} + + +/** + * new_fm_vhdr - allocate a new volume header for fastmap usage. + * @ubi: UBI device description object + * @vol_id: the VID of the new header + * + * Returns a new struct ubi_vid_hdr on success. + * NULL indicates out of memory. + */ +static struct ubi_vid_hdr *new_fm_vhdr(struct ubi_device *ubi, int vol_id) +{ + struct ubi_vid_hdr *new; + + new = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!new) + goto out; + + new->vol_type = UBI_VID_DYNAMIC; + new->vol_id = cpu_to_be32(vol_id); + + /* UBI implementations without fastmap support have to delete the + * fastmap. + */ + new->compat = UBI_COMPAT_DELETE; + +out: + return new; +} + +/** + * add_aeb - create and add a attach erase block to a given list. + * @ai: UBI attach info object + * @list: the target list + * @pnum: PEB number of the new attach erase block + * @ec: erease counter of the new LEB + * @scrub: scrub this PEB after attaching + * + * Returns 0 on success, < 0 indicates an internal error. + */ +static int add_aeb(struct ubi_attach_info *ai, struct list_head *list, + int pnum, int ec, int scrub) +{ + struct ubi_ainf_peb *aeb; + + aeb = kmem_cache_alloc(ai->aeb_slab_cache, GFP_KERNEL); + if (!aeb) + return -ENOMEM; + + aeb->pnum = pnum; + aeb->ec = ec; + aeb->lnum = -1; + aeb->scrub = scrub; + aeb->copy_flag = aeb->sqnum = 0; + + ai->ec_sum += aeb->ec; + ai->ec_count++; + + if (ai->max_ec < aeb->ec) + ai->max_ec = aeb->ec; + + if (ai->min_ec > aeb->ec) + ai->min_ec = aeb->ec; + + list_add_tail(&aeb->u.list, list); + + return 0; +} + +/** + * add_vol - create and add a new volume to ubi_attach_info. + * @ai: ubi_attach_info object + * @vol_id: VID of the new volume + * @used_ebs: number of used EBS + * @data_pad: data padding value of the new volume + * @vol_type: volume type + * @last_eb_bytes: number of bytes in the last LEB + * + * Returns the new struct ubi_ainf_volume on success. + * NULL indicates an error. + */ +static struct ubi_ainf_volume *add_vol(struct ubi_attach_info *ai, int vol_id, + int used_ebs, int data_pad, u8 vol_type, + int last_eb_bytes) +{ + struct ubi_ainf_volume *av; + struct rb_node **p = &ai->volumes.rb_node, *parent = NULL; + + while (*p) { + parent = *p; + av = rb_entry(parent, struct ubi_ainf_volume, rb); + + if (vol_id > av->vol_id) + p = &(*p)->rb_left; + else if (vol_id > av->vol_id) + p = &(*p)->rb_right; + } + + av = kmalloc(sizeof(struct ubi_ainf_volume), GFP_KERNEL); + if (!av) + goto out; + + av->highest_lnum = av->leb_count = 0; + av->vol_id = vol_id; + av->used_ebs = used_ebs; + av->data_pad = data_pad; + av->last_data_size = last_eb_bytes; + av->compat = 0; + av->vol_type = vol_type; + av->root = RB_ROOT; + + dbg_bld("found volume (ID %i)", vol_id); + + rb_link_node(&av->rb, parent, p); + rb_insert_color(&av->rb, &ai->volumes); + +out: + return av; +} + +/** + * assign_aeb_to_av - assigns a SEB to a given ainf_volume and removes it + * from it's original list. + * @ai: ubi_attach_info object + * @aeb: the to be assigned SEB + * @av: target scan volume + */ +static void assign_aeb_to_av(struct ubi_attach_info *ai, + struct ubi_ainf_peb *aeb, + struct ubi_ainf_volume *av) +{ + struct ubi_ainf_peb *tmp_aeb; + struct rb_node **p = &ai->volumes.rb_node, *parent = NULL; + + p = &av->root.rb_node; + while (*p) { + parent = *p; + + tmp_aeb = rb_entry(parent, struct ubi_ainf_peb, u.rb); + if (aeb->lnum != tmp_aeb->lnum) { + if (aeb->lnum < tmp_aeb->lnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + + continue; + } else + break; + } + + list_del(&aeb->u.list); + av->leb_count++; + + rb_link_node(&aeb->u.rb, parent, p); + rb_insert_color(&aeb->u.rb, &av->root); +} + +/** + * update_vol - inserts or updates a LEB which was found a pool. + * @ubi: the UBI device object + * @ai: attach info object + * @av: the volume this LEB belongs to + * @new_vh: the volume header derived from new_aeb + * @new_aeb: the AEB to be examined + * + * Returns 0 on success, < 0 indicates an internal error. + */ +static int update_vol(struct ubi_device *ubi, struct ubi_attach_info *ai, + struct ubi_ainf_volume *av, struct ubi_vid_hdr *new_vh, + struct ubi_ainf_peb *new_aeb) +{ + struct rb_node **p = &av->root.rb_node, *parent = NULL; + struct ubi_ainf_peb *aeb, *victim; + int cmp_res; + + while (*p) { + parent = *p; + aeb = rb_entry(parent, struct ubi_ainf_peb, u.rb); + + if (be32_to_cpu(new_vh->lnum) != aeb->lnum) { + if (be32_to_cpu(new_vh->lnum) < aeb->lnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + + continue; + } + + /* This case can happen if the fastmap gets written + * because of a volume change (creation, deletion, ..). + * Then a PEB can be within the persistent EBA and the pool. + */ + if (aeb->pnum == new_aeb->pnum) { + ubi_assert(aeb->lnum == new_aeb->lnum); + kmem_cache_free(ai->aeb_slab_cache, new_aeb); + + return 0; + } + + cmp_res = ubi_compare_lebs(ubi, aeb, new_aeb->pnum, new_vh); + if (cmp_res < 0) + return cmp_res; + + /* new_aeb is newer */ + if (cmp_res & 1) { + victim = kmem_cache_alloc(ai->aeb_slab_cache, + GFP_KERNEL); + if (!victim) + return -ENOMEM; + + victim->ec = aeb->ec; + victim->pnum = aeb->pnum; + list_add_tail(&victim->u.list, &ai->erase); + + if (av->highest_lnum == be32_to_cpu(new_vh->lnum)) + av->last_data_size = \ + be32_to_cpu(new_vh->data_size); + + dbg_bld("vol %i: AEB %i's PEB %i is the newer", + av->vol_id, aeb->lnum, new_aeb->pnum); + + aeb->ec = new_aeb->ec; + aeb->pnum = new_aeb->pnum; + aeb->copy_flag = new_vh->copy_flag; + aeb->scrub = new_aeb->scrub; + kmem_cache_free(ai->aeb_slab_cache, new_aeb); + + /* new_aeb is older */ + } else { + dbg_bld("vol %i: AEB %i's PEB %i is old, dropping it", + av->vol_id, aeb->lnum, new_aeb->pnum); + list_add_tail(&new_aeb->u.list, &ai->erase); + } + + return 0; + } + /* This LEB is new, let's add it to the volume */ + + if (av->highest_lnum <= be32_to_cpu(new_vh->lnum)) { + av->highest_lnum = be32_to_cpu(new_vh->lnum); + av->last_data_size = be32_to_cpu(new_vh->data_size); + } + + if (av->vol_type == UBI_STATIC_VOLUME) + av->used_ebs = be32_to_cpu(new_vh->used_ebs); + + av->leb_count++; + + rb_link_node(&new_aeb->u.rb, parent, p); + rb_insert_color(&new_aeb->u.rb, &av->root); + + return 0; +} + +/** + * process_pool_aeb - we found a non-empty PEB in a pool. + * @ubi: UBI device object + * @ai: attach info object + * @new_vh: the volume header derived from new_aeb + * @new_aeb: the AEB to be examined + * + * Returns 0 on success, < 0 indicates an internal error. + */ +static int process_pool_aeb(struct ubi_device *ubi, struct ubi_attach_info *ai, + struct ubi_vid_hdr *new_vh, + struct ubi_ainf_peb *new_aeb) +{ + struct ubi_ainf_volume *av, *tmp_av = NULL; + struct rb_node **p = &ai->volumes.rb_node, *parent = NULL; + int found = 0; + + if (be32_to_cpu(new_vh->vol_id) == UBI_FM_SB_VOLUME_ID || + be32_to_cpu(new_vh->vol_id) == UBI_FM_DATA_VOLUME_ID) { + kmem_cache_free(ai->aeb_slab_cache, new_aeb); + + return 0; + } + + /* Find the volume this SEB belongs to */ + while (*p) { + parent = *p; + tmp_av = rb_entry(parent, struct ubi_ainf_volume, rb); + + if (be32_to_cpu(new_vh->vol_id) > tmp_av->vol_id) + p = &(*p)->rb_left; + else if (be32_to_cpu(new_vh->vol_id) < tmp_av->vol_id) + p = &(*p)->rb_right; + else { + found = 1; + break; + } + } + + if (found) + av = tmp_av; + else { + ubi_err("orphaned volume in fastmap pool!"); + return UBI_BAD_FASTMAP; + } + + ubi_assert(be32_to_cpu(new_vh->vol_id) == av->vol_id); + + return update_vol(ubi, ai, av, new_vh, new_aeb); +} + +/** + * unmap_peb - unmap a PEB. + * If fastmap detects a free PEB in the pool it has to check whether + * this PEB has been unmapped after writing the fastmap. + * + * @ai: UBI attach info object + * @pnum: The PEB to be unmapped + */ +static void unmap_peb(struct ubi_attach_info *ai, int pnum) +{ + struct ubi_ainf_volume *av; + struct rb_node *node, *node2; + struct ubi_ainf_peb *aeb; + + for (node = rb_first(&ai->volumes); node; node = rb_next(node)) { + av = rb_entry(node, struct ubi_ainf_volume, rb); + + for (node2 = rb_first(&av->root); node2; + node2 = rb_next(node2)) { + aeb = rb_entry(node2, struct ubi_ainf_peb, u.rb); + if (aeb->pnum == pnum) { + rb_erase(&aeb->u.rb, &av->root); + kmem_cache_free(ai->aeb_slab_cache, aeb); + return; + } + } + } +} + +/** + * scan_pool - scans a pool for changed (no longer empty PEBs). + * @ubi: UBI device object + * @ai: attach info object + * @pebs: an array of all PEB numbers in the to be scanned pool + * @pool_size: size of the pool (number of entries in @pebs) + * @max_sqnum: pointer to the maximal sequence number + * @eba_orphans: list of PEBs which need to be scanned + * @free: list of PEBs which are most likely free (and go into @ai->free) + * + * Returns 0 on success, if the pool is unusable UBI_BAD_FASTMAP is returned. + * < 0 indicates an internal error. + */ +#ifndef __UBOOT__ +static int scan_pool(struct ubi_device *ubi, struct ubi_attach_info *ai, + int *pebs, int pool_size, unsigned long long *max_sqnum, + struct list_head *eba_orphans, struct list_head *freef) +#else +static int scan_pool(struct ubi_device *ubi, struct ubi_attach_info *ai, + __be32 *pebs, int pool_size, unsigned long long *max_sqnum, + struct list_head *eba_orphans, struct list_head *freef) +#endif +{ + struct ubi_vid_hdr *vh; + struct ubi_ec_hdr *ech; + struct ubi_ainf_peb *new_aeb, *tmp_aeb; + int i, pnum, err, found_orphan, ret = 0; + + ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ech) + return -ENOMEM; + + vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!vh) { + kfree(ech); + return -ENOMEM; + } + + dbg_bld("scanning fastmap pool: size = %i", pool_size); + + /* + * Now scan all PEBs in the pool to find changes which have been made + * after the creation of the fastmap + */ + for (i = 0; i < pool_size; i++) { + int scrub = 0; + int image_seq; + + pnum = be32_to_cpu(pebs[i]); + + if (ubi_io_is_bad(ubi, pnum)) { + ubi_err("bad PEB in fastmap pool!"); + ret = UBI_BAD_FASTMAP; + goto out; + } + + err = ubi_io_read_ec_hdr(ubi, pnum, ech, 0); + if (err && err != UBI_IO_BITFLIPS) { + ubi_err("unable to read EC header! PEB:%i err:%i", + pnum, err); + ret = err > 0 ? UBI_BAD_FASTMAP : err; + goto out; + } else if (ret == UBI_IO_BITFLIPS) + scrub = 1; + + /* + * Older UBI implementations have image_seq set to zero, so + * we shouldn't fail if image_seq == 0. + */ + image_seq = be32_to_cpu(ech->image_seq); + + if (image_seq && (image_seq != ubi->image_seq)) { + ubi_err("bad image seq: 0x%x, expected: 0x%x", + be32_to_cpu(ech->image_seq), ubi->image_seq); + ret = UBI_BAD_FASTMAP; + goto out; + } + + err = ubi_io_read_vid_hdr(ubi, pnum, vh, 0); + if (err == UBI_IO_FF || err == UBI_IO_FF_BITFLIPS) { + unsigned long long ec = be64_to_cpu(ech->ec); + unmap_peb(ai, pnum); + dbg_bld("Adding PEB to free: %i", pnum); + if (err == UBI_IO_FF_BITFLIPS) + add_aeb(ai, freef, pnum, ec, 1); + else + add_aeb(ai, freef, pnum, ec, 0); + continue; + } else if (err == 0 || err == UBI_IO_BITFLIPS) { + dbg_bld("Found non empty PEB:%i in pool", pnum); + + if (err == UBI_IO_BITFLIPS) + scrub = 1; + + found_orphan = 0; + list_for_each_entry(tmp_aeb, eba_orphans, u.list) { + if (tmp_aeb->pnum == pnum) { + found_orphan = 1; + break; + } + } + if (found_orphan) { + list_del(&tmp_aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, tmp_aeb); + } + + new_aeb = kmem_cache_alloc(ai->aeb_slab_cache, + GFP_KERNEL); + if (!new_aeb) { + ret = -ENOMEM; + goto out; + } + + new_aeb->ec = be64_to_cpu(ech->ec); + new_aeb->pnum = pnum; + new_aeb->lnum = be32_to_cpu(vh->lnum); + new_aeb->sqnum = be64_to_cpu(vh->sqnum); + new_aeb->copy_flag = vh->copy_flag; + new_aeb->scrub = scrub; + + if (*max_sqnum < new_aeb->sqnum) + *max_sqnum = new_aeb->sqnum; + + err = process_pool_aeb(ubi, ai, vh, new_aeb); + if (err) { + ret = err > 0 ? UBI_BAD_FASTMAP : err; + goto out; + } + } else { + /* We are paranoid and fall back to scanning mode */ + ubi_err("fastmap pool PEBs contains damaged PEBs!"); + ret = err > 0 ? UBI_BAD_FASTMAP : err; + goto out; + } + + } + +out: + ubi_free_vid_hdr(ubi, vh); + kfree(ech); + return ret; +} + +/** + * count_fastmap_pebs - Counts the PEBs found by fastmap. + * @ai: The UBI attach info object + */ +static int count_fastmap_pebs(struct ubi_attach_info *ai) +{ + struct ubi_ainf_peb *aeb; + struct ubi_ainf_volume *av; + struct rb_node *rb1, *rb2; + int n = 0; + + list_for_each_entry(aeb, &ai->erase, u.list) + n++; + + list_for_each_entry(aeb, &ai->free, u.list) + n++; + + ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) + ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) + n++; + + return n; +} + +/** + * ubi_attach_fastmap - creates ubi_attach_info from a fastmap. + * @ubi: UBI device object + * @ai: UBI attach info object + * @fm: the fastmap to be attached + * + * Returns 0 on success, UBI_BAD_FASTMAP if the found fastmap was unusable. + * < 0 indicates an internal error. + */ +static int ubi_attach_fastmap(struct ubi_device *ubi, + struct ubi_attach_info *ai, + struct ubi_fastmap_layout *fm) +{ + struct list_head used, eba_orphans, freef; + struct ubi_ainf_volume *av; + struct ubi_ainf_peb *aeb, *tmp_aeb, *_tmp_aeb; + struct ubi_ec_hdr *ech; + struct ubi_fm_sb *fmsb; + struct ubi_fm_hdr *fmhdr; + struct ubi_fm_scan_pool *fmpl1, *fmpl2; + struct ubi_fm_ec *fmec; + struct ubi_fm_volhdr *fmvhdr; + struct ubi_fm_eba *fm_eba; + int ret, i, j, pool_size, wl_pool_size; + size_t fm_pos = 0, fm_size = ubi->fm_size; + unsigned long long max_sqnum = 0; + void *fm_raw = ubi->fm_buf; + + INIT_LIST_HEAD(&used); + INIT_LIST_HEAD(&freef); + INIT_LIST_HEAD(&eba_orphans); + INIT_LIST_HEAD(&ai->corr); + INIT_LIST_HEAD(&ai->free); + INIT_LIST_HEAD(&ai->erase); + INIT_LIST_HEAD(&ai->alien); + ai->volumes = RB_ROOT; + ai->min_ec = UBI_MAX_ERASECOUNTER; + + ai->aeb_slab_cache = kmem_cache_create("ubi_ainf_peb_slab", + sizeof(struct ubi_ainf_peb), + 0, 0, NULL); + if (!ai->aeb_slab_cache) { + ret = -ENOMEM; + goto fail; + } + + fmsb = (struct ubi_fm_sb *)(fm_raw); + ai->max_sqnum = fmsb->sqnum; + fm_pos += sizeof(struct ubi_fm_sb); + if (fm_pos >= fm_size) + goto fail_bad; + + fmhdr = (struct ubi_fm_hdr *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmhdr); + if (fm_pos >= fm_size) + goto fail_bad; + + if (be32_to_cpu(fmhdr->magic) != UBI_FM_HDR_MAGIC) { + ubi_err("bad fastmap header magic: 0x%x, expected: 0x%x", + be32_to_cpu(fmhdr->magic), UBI_FM_HDR_MAGIC); + goto fail_bad; + } + + fmpl1 = (struct ubi_fm_scan_pool *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmpl1); + if (fm_pos >= fm_size) + goto fail_bad; + if (be32_to_cpu(fmpl1->magic) != UBI_FM_POOL_MAGIC) { + ubi_err("bad fastmap pool magic: 0x%x, expected: 0x%x", + be32_to_cpu(fmpl1->magic), UBI_FM_POOL_MAGIC); + goto fail_bad; + } + + fmpl2 = (struct ubi_fm_scan_pool *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmpl2); + if (fm_pos >= fm_size) + goto fail_bad; + if (be32_to_cpu(fmpl2->magic) != UBI_FM_POOL_MAGIC) { + ubi_err("bad fastmap pool magic: 0x%x, expected: 0x%x", + be32_to_cpu(fmpl2->magic), UBI_FM_POOL_MAGIC); + goto fail_bad; + } + + pool_size = be16_to_cpu(fmpl1->size); + wl_pool_size = be16_to_cpu(fmpl2->size); + fm->max_pool_size = be16_to_cpu(fmpl1->max_size); + fm->max_wl_pool_size = be16_to_cpu(fmpl2->max_size); + + if (pool_size > UBI_FM_MAX_POOL_SIZE || pool_size < 0) { + ubi_err("bad pool size: %i", pool_size); + goto fail_bad; + } + + if (wl_pool_size > UBI_FM_MAX_POOL_SIZE || wl_pool_size < 0) { + ubi_err("bad WL pool size: %i", wl_pool_size); + goto fail_bad; + } + + + if (fm->max_pool_size > UBI_FM_MAX_POOL_SIZE || + fm->max_pool_size < 0) { + ubi_err("bad maximal pool size: %i", fm->max_pool_size); + goto fail_bad; + } + + if (fm->max_wl_pool_size > UBI_FM_MAX_POOL_SIZE || + fm->max_wl_pool_size < 0) { + ubi_err("bad maximal WL pool size: %i", fm->max_wl_pool_size); + goto fail_bad; + } + + /* read EC values from free list */ + for (i = 0; i < be32_to_cpu(fmhdr->free_peb_count); i++) { + fmec = (struct ubi_fm_ec *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmec); + if (fm_pos >= fm_size) + goto fail_bad; + + add_aeb(ai, &ai->free, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 0); + } + + /* read EC values from used list */ + for (i = 0; i < be32_to_cpu(fmhdr->used_peb_count); i++) { + fmec = (struct ubi_fm_ec *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmec); + if (fm_pos >= fm_size) + goto fail_bad; + + add_aeb(ai, &used, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 0); + } + + /* read EC values from scrub list */ + for (i = 0; i < be32_to_cpu(fmhdr->scrub_peb_count); i++) { + fmec = (struct ubi_fm_ec *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmec); + if (fm_pos >= fm_size) + goto fail_bad; + + add_aeb(ai, &used, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 1); + } + + /* read EC values from erase list */ + for (i = 0; i < be32_to_cpu(fmhdr->erase_peb_count); i++) { + fmec = (struct ubi_fm_ec *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmec); + if (fm_pos >= fm_size) + goto fail_bad; + + add_aeb(ai, &ai->erase, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 1); + } + + ai->mean_ec = div_u64(ai->ec_sum, ai->ec_count); + ai->bad_peb_count = be32_to_cpu(fmhdr->bad_peb_count); + + /* Iterate over all volumes and read their EBA table */ + for (i = 0; i < be32_to_cpu(fmhdr->vol_count); i++) { + fmvhdr = (struct ubi_fm_volhdr *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmvhdr); + if (fm_pos >= fm_size) + goto fail_bad; + + if (be32_to_cpu(fmvhdr->magic) != UBI_FM_VHDR_MAGIC) { + ubi_err("bad fastmap vol header magic: 0x%x, " \ + "expected: 0x%x", + be32_to_cpu(fmvhdr->magic), UBI_FM_VHDR_MAGIC); + goto fail_bad; + } + + av = add_vol(ai, be32_to_cpu(fmvhdr->vol_id), + be32_to_cpu(fmvhdr->used_ebs), + be32_to_cpu(fmvhdr->data_pad), + fmvhdr->vol_type, + be32_to_cpu(fmvhdr->last_eb_bytes)); + + if (!av) + goto fail_bad; + + ai->vols_found++; + if (ai->highest_vol_id < be32_to_cpu(fmvhdr->vol_id)) + ai->highest_vol_id = be32_to_cpu(fmvhdr->vol_id); + + fm_eba = (struct ubi_fm_eba *)(fm_raw + fm_pos); + fm_pos += sizeof(*fm_eba); + fm_pos += (sizeof(__be32) * be32_to_cpu(fm_eba->reserved_pebs)); + if (fm_pos >= fm_size) + goto fail_bad; + + if (be32_to_cpu(fm_eba->magic) != UBI_FM_EBA_MAGIC) { + ubi_err("bad fastmap EBA header magic: 0x%x, " \ + "expected: 0x%x", + be32_to_cpu(fm_eba->magic), UBI_FM_EBA_MAGIC); + goto fail_bad; + } + + for (j = 0; j < be32_to_cpu(fm_eba->reserved_pebs); j++) { + int pnum = be32_to_cpu(fm_eba->pnum[j]); + + if ((int)be32_to_cpu(fm_eba->pnum[j]) < 0) + continue; + + aeb = NULL; + list_for_each_entry(tmp_aeb, &used, u.list) { + if (tmp_aeb->pnum == pnum) { + aeb = tmp_aeb; + break; + } + } + + /* This can happen if a PEB is already in an EBA known + * by this fastmap but the PEB itself is not in the used + * list. + * In this case the PEB can be within the fastmap pool + * or while writing the fastmap it was in the protection + * queue. + */ + if (!aeb) { + aeb = kmem_cache_alloc(ai->aeb_slab_cache, + GFP_KERNEL); + if (!aeb) { + ret = -ENOMEM; + + goto fail; + } + + aeb->lnum = j; + aeb->pnum = be32_to_cpu(fm_eba->pnum[j]); + aeb->ec = -1; + aeb->scrub = aeb->copy_flag = aeb->sqnum = 0; + list_add_tail(&aeb->u.list, &eba_orphans); + continue; + } + + aeb->lnum = j; + + if (av->highest_lnum <= aeb->lnum) + av->highest_lnum = aeb->lnum; + + assign_aeb_to_av(ai, aeb, av); + + dbg_bld("inserting PEB:%i (LEB %i) to vol %i", + aeb->pnum, aeb->lnum, av->vol_id); + } + + ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ech) { + ret = -ENOMEM; + goto fail; + } + + list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &eba_orphans, + u.list) { + int err; + + if (ubi_io_is_bad(ubi, tmp_aeb->pnum)) { + ubi_err("bad PEB in fastmap EBA orphan list"); + ret = UBI_BAD_FASTMAP; + kfree(ech); + goto fail; + } + + err = ubi_io_read_ec_hdr(ubi, tmp_aeb->pnum, ech, 0); + if (err && err != UBI_IO_BITFLIPS) { + ubi_err("unable to read EC header! PEB:%i " \ + "err:%i", tmp_aeb->pnum, err); + ret = err > 0 ? UBI_BAD_FASTMAP : err; + kfree(ech); + + goto fail; + } else if (err == UBI_IO_BITFLIPS) + tmp_aeb->scrub = 1; + + tmp_aeb->ec = be64_to_cpu(ech->ec); + assign_aeb_to_av(ai, tmp_aeb, av); + } + + kfree(ech); + } + + ret = scan_pool(ubi, ai, fmpl1->pebs, pool_size, &max_sqnum, + &eba_orphans, &freef); + if (ret) + goto fail; + + ret = scan_pool(ubi, ai, fmpl2->pebs, wl_pool_size, &max_sqnum, + &eba_orphans, &freef); + if (ret) + goto fail; + + if (max_sqnum > ai->max_sqnum) + ai->max_sqnum = max_sqnum; + + list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &freef, u.list) + list_move_tail(&tmp_aeb->u.list, &ai->free); + + ubi_assert(list_empty(&used)); + ubi_assert(list_empty(&eba_orphans)); + ubi_assert(list_empty(&freef)); + + /* + * If fastmap is leaking PEBs (must not happen), raise a + * fat warning and fall back to scanning mode. + * We do this here because in ubi_wl_init() it's too late + * and we cannot fall back to scanning. + */ +#ifndef __UBOOT__ + if (WARN_ON(count_fastmap_pebs(ai) != ubi->peb_count - + ai->bad_peb_count - fm->used_blocks)) + goto fail_bad; +#else + if (count_fastmap_pebs(ai) != ubi->peb_count - + ai->bad_peb_count - fm->used_blocks) { + WARN_ON(1); + goto fail_bad; + } +#endif + + return 0; + +fail_bad: + ret = UBI_BAD_FASTMAP; +fail: + list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &used, u.list) { + list_del(&tmp_aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, tmp_aeb); + } + list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &eba_orphans, u.list) { + list_del(&tmp_aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, tmp_aeb); + } + list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &freef, u.list) { + list_del(&tmp_aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, tmp_aeb); + } + + return ret; +} + +/** + * ubi_scan_fastmap - scan the fastmap. + * @ubi: UBI device object + * @ai: UBI attach info to be filled + * @fm_anchor: The fastmap starts at this PEB + * + * Returns 0 on success, UBI_NO_FASTMAP if no fastmap was found, + * UBI_BAD_FASTMAP if one was found but is not usable. + * < 0 indicates an internal error. + */ +int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai, + int fm_anchor) +{ + struct ubi_fm_sb *fmsb, *fmsb2; + struct ubi_vid_hdr *vh; + struct ubi_ec_hdr *ech; + struct ubi_fastmap_layout *fm; + int i, used_blocks, pnum, ret = 0; + size_t fm_size; + __be32 crc, tmp_crc; + unsigned long long sqnum = 0; + + mutex_lock(&ubi->fm_mutex); + memset(ubi->fm_buf, 0, ubi->fm_size); + + fmsb = kmalloc(sizeof(*fmsb), GFP_KERNEL); + if (!fmsb) { + ret = -ENOMEM; + goto out; + } + + fm = kzalloc(sizeof(*fm), GFP_KERNEL); + if (!fm) { + ret = -ENOMEM; + kfree(fmsb); + goto out; + } + + ret = ubi_io_read(ubi, fmsb, fm_anchor, ubi->leb_start, sizeof(*fmsb)); + if (ret && ret != UBI_IO_BITFLIPS) + goto free_fm_sb; + else if (ret == UBI_IO_BITFLIPS) + fm->to_be_tortured[0] = 1; + + if (be32_to_cpu(fmsb->magic) != UBI_FM_SB_MAGIC) { + ubi_err("bad super block magic: 0x%x, expected: 0x%x", + be32_to_cpu(fmsb->magic), UBI_FM_SB_MAGIC); + ret = UBI_BAD_FASTMAP; + goto free_fm_sb; + } + + if (fmsb->version != UBI_FM_FMT_VERSION) { + ubi_err("bad fastmap version: %i, expected: %i", + fmsb->version, UBI_FM_FMT_VERSION); + ret = UBI_BAD_FASTMAP; + goto free_fm_sb; + } + + used_blocks = be32_to_cpu(fmsb->used_blocks); + if (used_blocks > UBI_FM_MAX_BLOCKS || used_blocks < 1) { + ubi_err("number of fastmap blocks is invalid: %i", used_blocks); + ret = UBI_BAD_FASTMAP; + goto free_fm_sb; + } + + fm_size = ubi->leb_size * used_blocks; + if (fm_size != ubi->fm_size) { + ubi_err("bad fastmap size: %zi, expected: %zi", fm_size, + ubi->fm_size); + ret = UBI_BAD_FASTMAP; + goto free_fm_sb; + } + + ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ech) { + ret = -ENOMEM; + goto free_fm_sb; + } + + vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!vh) { + ret = -ENOMEM; + goto free_hdr; + } + + for (i = 0; i < used_blocks; i++) { + int image_seq; + + pnum = be32_to_cpu(fmsb->block_loc[i]); + + if (ubi_io_is_bad(ubi, pnum)) { + ret = UBI_BAD_FASTMAP; + goto free_hdr; + } + + ret = ubi_io_read_ec_hdr(ubi, pnum, ech, 0); + if (ret && ret != UBI_IO_BITFLIPS) { + ubi_err("unable to read fastmap block# %i EC (PEB: %i)", + i, pnum); + if (ret > 0) + ret = UBI_BAD_FASTMAP; + goto free_hdr; + } else if (ret == UBI_IO_BITFLIPS) + fm->to_be_tortured[i] = 1; + + image_seq = be32_to_cpu(ech->image_seq); + if (!ubi->image_seq) + ubi->image_seq = image_seq; + + /* + * Older UBI implementations have image_seq set to zero, so + * we shouldn't fail if image_seq == 0. + */ + if (image_seq && (image_seq != ubi->image_seq)) { + ubi_err("wrong image seq:%d instead of %d", + be32_to_cpu(ech->image_seq), ubi->image_seq); + ret = UBI_BAD_FASTMAP; + goto free_hdr; + } + + ret = ubi_io_read_vid_hdr(ubi, pnum, vh, 0); + if (ret && ret != UBI_IO_BITFLIPS) { + ubi_err("unable to read fastmap block# %i (PEB: %i)", + i, pnum); + goto free_hdr; + } + + if (i == 0) { + if (be32_to_cpu(vh->vol_id) != UBI_FM_SB_VOLUME_ID) { + ubi_err("bad fastmap anchor vol_id: 0x%x," \ + " expected: 0x%x", + be32_to_cpu(vh->vol_id), + UBI_FM_SB_VOLUME_ID); + ret = UBI_BAD_FASTMAP; + goto free_hdr; + } + } else { + if (be32_to_cpu(vh->vol_id) != UBI_FM_DATA_VOLUME_ID) { + ubi_err("bad fastmap data vol_id: 0x%x," \ + " expected: 0x%x", + be32_to_cpu(vh->vol_id), + UBI_FM_DATA_VOLUME_ID); + ret = UBI_BAD_FASTMAP; + goto free_hdr; + } + } + + if (sqnum < be64_to_cpu(vh->sqnum)) + sqnum = be64_to_cpu(vh->sqnum); + + ret = ubi_io_read(ubi, ubi->fm_buf + (ubi->leb_size * i), pnum, + ubi->leb_start, ubi->leb_size); + if (ret && ret != UBI_IO_BITFLIPS) { + ubi_err("unable to read fastmap block# %i (PEB: %i, " \ + "err: %i)", i, pnum, ret); + goto free_hdr; + } + } + + kfree(fmsb); + fmsb = NULL; + + fmsb2 = (struct ubi_fm_sb *)(ubi->fm_buf); + tmp_crc = be32_to_cpu(fmsb2->data_crc); + fmsb2->data_crc = 0; + crc = crc32(UBI_CRC32_INIT, ubi->fm_buf, fm_size); + if (crc != tmp_crc) { + ubi_err("fastmap data CRC is invalid"); + ubi_err("CRC should be: 0x%x, calc: 0x%x", tmp_crc, crc); + ret = UBI_BAD_FASTMAP; + goto free_hdr; + } + + fmsb2->sqnum = sqnum; + + fm->used_blocks = used_blocks; + + ret = ubi_attach_fastmap(ubi, ai, fm); + if (ret) { + if (ret > 0) + ret = UBI_BAD_FASTMAP; + goto free_hdr; + } + + for (i = 0; i < used_blocks; i++) { + struct ubi_wl_entry *e; + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) { + while (i--) + kfree(fm->e[i]); + + ret = -ENOMEM; + goto free_hdr; + } + + e->pnum = be32_to_cpu(fmsb2->block_loc[i]); + e->ec = be32_to_cpu(fmsb2->block_ec[i]); + fm->e[i] = e; + } + + ubi->fm = fm; + ubi->fm_pool.max_size = ubi->fm->max_pool_size; + ubi->fm_wl_pool.max_size = ubi->fm->max_wl_pool_size; + ubi_msg("attached by fastmap"); + ubi_msg("fastmap pool size: %d", ubi->fm_pool.max_size); + ubi_msg("fastmap WL pool size: %d", ubi->fm_wl_pool.max_size); + ubi->fm_disabled = 0; + + ubi_free_vid_hdr(ubi, vh); + kfree(ech); +out: + mutex_unlock(&ubi->fm_mutex); + if (ret == UBI_BAD_FASTMAP) + ubi_err("Attach by fastmap failed, doing a full scan!"); + return ret; + +free_hdr: + ubi_free_vid_hdr(ubi, vh); + kfree(ech); +free_fm_sb: + kfree(fmsb); + kfree(fm); + goto out; +} + +/** + * ubi_write_fastmap - writes a fastmap. + * @ubi: UBI device object + * @new_fm: the to be written fastmap + * + * Returns 0 on success, < 0 indicates an internal error. + */ +static int ubi_write_fastmap(struct ubi_device *ubi, + struct ubi_fastmap_layout *new_fm) +{ + size_t fm_pos = 0; + void *fm_raw; + struct ubi_fm_sb *fmsb; + struct ubi_fm_hdr *fmh; + struct ubi_fm_scan_pool *fmpl1, *fmpl2; + struct ubi_fm_ec *fec; + struct ubi_fm_volhdr *fvh; + struct ubi_fm_eba *feba; + struct rb_node *node; + struct ubi_wl_entry *wl_e; + struct ubi_volume *vol; + struct ubi_vid_hdr *avhdr, *dvhdr; + struct ubi_work *ubi_wrk; + int ret, i, j, free_peb_count, used_peb_count, vol_count; + int scrub_peb_count, erase_peb_count; + + fm_raw = ubi->fm_buf; + memset(ubi->fm_buf, 0, ubi->fm_size); + + avhdr = new_fm_vhdr(ubi, UBI_FM_SB_VOLUME_ID); + if (!avhdr) { + ret = -ENOMEM; + goto out; + } + + dvhdr = new_fm_vhdr(ubi, UBI_FM_DATA_VOLUME_ID); + if (!dvhdr) { + ret = -ENOMEM; + goto out_kfree; + } + + spin_lock(&ubi->volumes_lock); + spin_lock(&ubi->wl_lock); + + fmsb = (struct ubi_fm_sb *)fm_raw; + fm_pos += sizeof(*fmsb); + ubi_assert(fm_pos <= ubi->fm_size); + + fmh = (struct ubi_fm_hdr *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmh); + ubi_assert(fm_pos <= ubi->fm_size); + + fmsb->magic = cpu_to_be32(UBI_FM_SB_MAGIC); + fmsb->version = UBI_FM_FMT_VERSION; + fmsb->used_blocks = cpu_to_be32(new_fm->used_blocks); + /* the max sqnum will be filled in while *reading* the fastmap */ + fmsb->sqnum = 0; + + fmh->magic = cpu_to_be32(UBI_FM_HDR_MAGIC); + free_peb_count = 0; + used_peb_count = 0; + scrub_peb_count = 0; + erase_peb_count = 0; + vol_count = 0; + + fmpl1 = (struct ubi_fm_scan_pool *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmpl1); + fmpl1->magic = cpu_to_be32(UBI_FM_POOL_MAGIC); + fmpl1->size = cpu_to_be16(ubi->fm_pool.size); + fmpl1->max_size = cpu_to_be16(ubi->fm_pool.max_size); + + for (i = 0; i < ubi->fm_pool.size; i++) + fmpl1->pebs[i] = cpu_to_be32(ubi->fm_pool.pebs[i]); + + fmpl2 = (struct ubi_fm_scan_pool *)(fm_raw + fm_pos); + fm_pos += sizeof(*fmpl2); + fmpl2->magic = cpu_to_be32(UBI_FM_POOL_MAGIC); + fmpl2->size = cpu_to_be16(ubi->fm_wl_pool.size); + fmpl2->max_size = cpu_to_be16(ubi->fm_wl_pool.max_size); + + for (i = 0; i < ubi->fm_wl_pool.size; i++) + fmpl2->pebs[i] = cpu_to_be32(ubi->fm_wl_pool.pebs[i]); + + for (node = rb_first(&ubi->free); node; node = rb_next(node)) { + wl_e = rb_entry(node, struct ubi_wl_entry, u.rb); + fec = (struct ubi_fm_ec *)(fm_raw + fm_pos); + + fec->pnum = cpu_to_be32(wl_e->pnum); + fec->ec = cpu_to_be32(wl_e->ec); + + free_peb_count++; + fm_pos += sizeof(*fec); + ubi_assert(fm_pos <= ubi->fm_size); + } + fmh->free_peb_count = cpu_to_be32(free_peb_count); + + for (node = rb_first(&ubi->used); node; node = rb_next(node)) { + wl_e = rb_entry(node, struct ubi_wl_entry, u.rb); + fec = (struct ubi_fm_ec *)(fm_raw + fm_pos); + + fec->pnum = cpu_to_be32(wl_e->pnum); + fec->ec = cpu_to_be32(wl_e->ec); + + used_peb_count++; + fm_pos += sizeof(*fec); + ubi_assert(fm_pos <= ubi->fm_size); + } + fmh->used_peb_count = cpu_to_be32(used_peb_count); + + for (node = rb_first(&ubi->scrub); node; node = rb_next(node)) { + wl_e = rb_entry(node, struct ubi_wl_entry, u.rb); + fec = (struct ubi_fm_ec *)(fm_raw + fm_pos); + + fec->pnum = cpu_to_be32(wl_e->pnum); + fec->ec = cpu_to_be32(wl_e->ec); + + scrub_peb_count++; + fm_pos += sizeof(*fec); + ubi_assert(fm_pos <= ubi->fm_size); + } + fmh->scrub_peb_count = cpu_to_be32(scrub_peb_count); + + + list_for_each_entry(ubi_wrk, &ubi->works, list) { + if (ubi_is_erase_work(ubi_wrk)) { + wl_e = ubi_wrk->e; + ubi_assert(wl_e); + + fec = (struct ubi_fm_ec *)(fm_raw + fm_pos); + + fec->pnum = cpu_to_be32(wl_e->pnum); + fec->ec = cpu_to_be32(wl_e->ec); + + erase_peb_count++; + fm_pos += sizeof(*fec); + ubi_assert(fm_pos <= ubi->fm_size); + } + } + fmh->erase_peb_count = cpu_to_be32(erase_peb_count); + + for (i = 0; i < UBI_MAX_VOLUMES + UBI_INT_VOL_COUNT; i++) { + vol = ubi->volumes[i]; + + if (!vol) + continue; + + vol_count++; + + fvh = (struct ubi_fm_volhdr *)(fm_raw + fm_pos); + fm_pos += sizeof(*fvh); + ubi_assert(fm_pos <= ubi->fm_size); + + fvh->magic = cpu_to_be32(UBI_FM_VHDR_MAGIC); + fvh->vol_id = cpu_to_be32(vol->vol_id); + fvh->vol_type = vol->vol_type; + fvh->used_ebs = cpu_to_be32(vol->used_ebs); + fvh->data_pad = cpu_to_be32(vol->data_pad); + fvh->last_eb_bytes = cpu_to_be32(vol->last_eb_bytes); + + ubi_assert(vol->vol_type == UBI_DYNAMIC_VOLUME || + vol->vol_type == UBI_STATIC_VOLUME); + + feba = (struct ubi_fm_eba *)(fm_raw + fm_pos); + fm_pos += sizeof(*feba) + (sizeof(__be32) * vol->reserved_pebs); + ubi_assert(fm_pos <= ubi->fm_size); + + for (j = 0; j < vol->reserved_pebs; j++) + feba->pnum[j] = cpu_to_be32(vol->eba_tbl[j]); + + feba->reserved_pebs = cpu_to_be32(j); + feba->magic = cpu_to_be32(UBI_FM_EBA_MAGIC); + } + fmh->vol_count = cpu_to_be32(vol_count); + fmh->bad_peb_count = cpu_to_be32(ubi->bad_peb_count); + + avhdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); + avhdr->lnum = 0; + + spin_unlock(&ubi->wl_lock); + spin_unlock(&ubi->volumes_lock); + + dbg_bld("writing fastmap SB to PEB %i", new_fm->e[0]->pnum); + ret = ubi_io_write_vid_hdr(ubi, new_fm->e[0]->pnum, avhdr); + if (ret) { + ubi_err("unable to write vid_hdr to fastmap SB!"); + goto out_kfree; + } + + for (i = 0; i < new_fm->used_blocks; i++) { + fmsb->block_loc[i] = cpu_to_be32(new_fm->e[i]->pnum); + fmsb->block_ec[i] = cpu_to_be32(new_fm->e[i]->ec); + } + + fmsb->data_crc = 0; + fmsb->data_crc = cpu_to_be32(crc32(UBI_CRC32_INIT, fm_raw, + ubi->fm_size)); + + for (i = 1; i < new_fm->used_blocks; i++) { + dvhdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); + dvhdr->lnum = cpu_to_be32(i); + dbg_bld("writing fastmap data to PEB %i sqnum %llu", + new_fm->e[i]->pnum, be64_to_cpu(dvhdr->sqnum)); + ret = ubi_io_write_vid_hdr(ubi, new_fm->e[i]->pnum, dvhdr); + if (ret) { + ubi_err("unable to write vid_hdr to PEB %i!", + new_fm->e[i]->pnum); + goto out_kfree; + } + } + + for (i = 0; i < new_fm->used_blocks; i++) { + ret = ubi_io_write(ubi, fm_raw + (i * ubi->leb_size), + new_fm->e[i]->pnum, ubi->leb_start, ubi->leb_size); + if (ret) { + ubi_err("unable to write fastmap to PEB %i!", + new_fm->e[i]->pnum); + goto out_kfree; + } + } + + ubi_assert(new_fm); + ubi->fm = new_fm; + + dbg_bld("fastmap written!"); + +out_kfree: + ubi_free_vid_hdr(ubi, avhdr); + ubi_free_vid_hdr(ubi, dvhdr); +out: + return ret; +} + +/** + * erase_block - Manually erase a PEB. + * @ubi: UBI device object + * @pnum: PEB to be erased + * + * Returns the new EC value on success, < 0 indicates an internal error. + */ +static int erase_block(struct ubi_device *ubi, int pnum) +{ + int ret; + struct ubi_ec_hdr *ec_hdr; + long long ec; + + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ec_hdr) + return -ENOMEM; + + ret = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0); + if (ret < 0) + goto out; + else if (ret && ret != UBI_IO_BITFLIPS) { + ret = -EINVAL; + goto out; + } + + ret = ubi_io_sync_erase(ubi, pnum, 0); + if (ret < 0) + goto out; + + ec = be64_to_cpu(ec_hdr->ec); + ec += ret; + if (ec > UBI_MAX_ERASECOUNTER) { + ret = -EINVAL; + goto out; + } + + ec_hdr->ec = cpu_to_be64(ec); + ret = ubi_io_write_ec_hdr(ubi, pnum, ec_hdr); + if (ret < 0) + goto out; + + ret = ec; +out: + kfree(ec_hdr); + return ret; +} + +/** + * invalidate_fastmap - destroys a fastmap. + * @ubi: UBI device object + * @fm: the fastmap to be destroyed + * + * Returns 0 on success, < 0 indicates an internal error. + */ +static int invalidate_fastmap(struct ubi_device *ubi, + struct ubi_fastmap_layout *fm) +{ + int ret; + struct ubi_vid_hdr *vh; + + ret = erase_block(ubi, fm->e[0]->pnum); + if (ret < 0) + return ret; + + vh = new_fm_vhdr(ubi, UBI_FM_SB_VOLUME_ID); + if (!vh) + return -ENOMEM; + + /* deleting the current fastmap SB is not enough, an old SB may exist, + * so create a (corrupted) SB such that fastmap will find it and fall + * back to scanning mode in any case */ + vh->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); + ret = ubi_io_write_vid_hdr(ubi, fm->e[0]->pnum, vh); + + return ret; +} + +/** + * ubi_update_fastmap - will be called by UBI if a volume changes or + * a fastmap pool becomes full. + * @ubi: UBI device object + * + * Returns 0 on success, < 0 indicates an internal error. + */ +int ubi_update_fastmap(struct ubi_device *ubi) +{ + int ret, i; + struct ubi_fastmap_layout *new_fm, *old_fm; + struct ubi_wl_entry *tmp_e; + + mutex_lock(&ubi->fm_mutex); + + ubi_refill_pools(ubi); + + if (ubi->ro_mode || ubi->fm_disabled) { + mutex_unlock(&ubi->fm_mutex); + return 0; + } + + ret = ubi_ensure_anchor_pebs(ubi); + if (ret) { + mutex_unlock(&ubi->fm_mutex); + return ret; + } + + new_fm = kzalloc(sizeof(*new_fm), GFP_KERNEL); + if (!new_fm) { + mutex_unlock(&ubi->fm_mutex); + return -ENOMEM; + } + + new_fm->used_blocks = ubi->fm_size / ubi->leb_size; + + for (i = 0; i < new_fm->used_blocks; i++) { + new_fm->e[i] = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!new_fm->e[i]) { + while (i--) + kfree(new_fm->e[i]); + + kfree(new_fm); + mutex_unlock(&ubi->fm_mutex); + return -ENOMEM; + } + } + + old_fm = ubi->fm; + ubi->fm = NULL; + + if (new_fm->used_blocks > UBI_FM_MAX_BLOCKS) { + ubi_err("fastmap too large"); + ret = -ENOSPC; + goto err; + } + + for (i = 1; i < new_fm->used_blocks; i++) { + spin_lock(&ubi->wl_lock); + tmp_e = ubi_wl_get_fm_peb(ubi, 0); + spin_unlock(&ubi->wl_lock); + + if (!tmp_e && !old_fm) { + int j; + ubi_err("could not get any free erase block"); + + for (j = 1; j < i; j++) + ubi_wl_put_fm_peb(ubi, new_fm->e[j], j, 0); + + ret = -ENOSPC; + goto err; + } else if (!tmp_e && old_fm) { + ret = erase_block(ubi, old_fm->e[i]->pnum); + if (ret < 0) { + int j; + + for (j = 1; j < i; j++) + ubi_wl_put_fm_peb(ubi, new_fm->e[j], + j, 0); + + ubi_err("could not erase old fastmap PEB"); + goto err; + } + + new_fm->e[i]->pnum = old_fm->e[i]->pnum; + new_fm->e[i]->ec = old_fm->e[i]->ec; + } else { + new_fm->e[i]->pnum = tmp_e->pnum; + new_fm->e[i]->ec = tmp_e->ec; + + if (old_fm) + ubi_wl_put_fm_peb(ubi, old_fm->e[i], i, + old_fm->to_be_tortured[i]); + } + } + + spin_lock(&ubi->wl_lock); + tmp_e = ubi_wl_get_fm_peb(ubi, 1); + spin_unlock(&ubi->wl_lock); + + if (old_fm) { + /* no fresh anchor PEB was found, reuse the old one */ + if (!tmp_e) { + ret = erase_block(ubi, old_fm->e[0]->pnum); + if (ret < 0) { + int i; + ubi_err("could not erase old anchor PEB"); + + for (i = 1; i < new_fm->used_blocks; i++) + ubi_wl_put_fm_peb(ubi, new_fm->e[i], + i, 0); + goto err; + } + + new_fm->e[0]->pnum = old_fm->e[0]->pnum; + new_fm->e[0]->ec = ret; + } else { + /* we've got a new anchor PEB, return the old one */ + ubi_wl_put_fm_peb(ubi, old_fm->e[0], 0, + old_fm->to_be_tortured[0]); + + new_fm->e[0]->pnum = tmp_e->pnum; + new_fm->e[0]->ec = tmp_e->ec; + } + } else { + if (!tmp_e) { + int i; + ubi_err("could not find any anchor PEB"); + + for (i = 1; i < new_fm->used_blocks; i++) + ubi_wl_put_fm_peb(ubi, new_fm->e[i], i, 0); + + ret = -ENOSPC; + goto err; + } + + new_fm->e[0]->pnum = tmp_e->pnum; + new_fm->e[0]->ec = tmp_e->ec; + } + + down_write(&ubi->work_sem); + down_write(&ubi->fm_sem); + ret = ubi_write_fastmap(ubi, new_fm); + up_write(&ubi->fm_sem); + up_write(&ubi->work_sem); + + if (ret) + goto err; + +out_unlock: + mutex_unlock(&ubi->fm_mutex); + kfree(old_fm); + return ret; + +err: + kfree(new_fm); + + ubi_warn("Unable to write new fastmap, err=%i", ret); + + ret = 0; + if (old_fm) { + ret = invalidate_fastmap(ubi, old_fm); + if (ret < 0) + ubi_err("Unable to invalidiate current fastmap!"); + else if (ret) + ret = 0; + } + goto out_unlock; +} diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 960befc..41d7eb7 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -1,22 +1,21 @@ /* * Copyright (c) International Business Machines Corp., 2006 * Copyright (c) Nokia Corporation, 2006, 2007 - * * SPDX-License-Identifier: GPL-2.0+ * * Author: Artem Bityutskiy (Битюцкий Артём) */ /* - * UBI input/output unit. + * UBI input/output sub-system. * - * This unit provides a uniform way to work with all kinds of the underlying - * MTD devices. It also implements handy functions for reading and writing UBI - * headers. + * This sub-system provides a uniform way to work with all kinds of the + * underlying MTD devices. It also implements handy functions for reading and + * writing UBI headers. * * We are trying to have a paranoid mindset and not to trust to what we read - * from the flash media in order to be more secure and robust. So this unit - * validates every single header it reads from the flash media. + * from the flash media in order to be more secure and robust. So this + * sub-system validates every single header it reads from the flash media. * * Some words about how the eraseblock headers are stored. * @@ -52,9 +51,9 @@ * device, e.g., make @ubi->min_io_size = 512 in the example above? * * A: because when writing a sub-page, MTD still writes a full 2K page but the - * bytes which are no relevant to the sub-page are 0xFF. So, basically, writing - * 4x512 sub-pages is 4 times slower then writing one 2KiB NAND page. Thus, we - * prefer to use sub-pages only for EV and VID headers. + * bytes which are not relevant to the sub-page are 0xFF. So, basically, + * writing 4x512 sub-pages is 4 times slower than writing one 2KiB NAND page. + * Thus, we prefer to use sub-pages only for EC and VID headers. * * As it was noted above, the VID header may start at a non-aligned offset. * For example, in case of a 2KiB page NAND flash with a 512 bytes sub-page, @@ -67,39 +66,33 @@ * 512-byte chunks, we have to allocate one more buffer and copy our VID header * to offset 448 of this buffer. * - * The I/O unit does the following trick in order to avoid this extra copy. - * It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID header - * and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. When the - * VID header is being written out, it shifts the VID header pointer back and - * writes the whole sub-page. + * The I/O sub-system does the following trick in order to avoid this extra + * copy. It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID + * header and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. + * When the VID header is being written out, it shifts the VID header pointer + * back and writes the whole sub-page. */ -#ifdef UBI_LINUX +#define __UBOOT__ +#ifndef __UBOOT__ #include <linux/crc32.h> #include <linux/err.h> +#include <linux/slab.h> +#else +#include <ubi_uboot.h> #endif -#include <ubi_uboot.h> #include "ubi.h" -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID -static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum); -static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum); -static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum, - const struct ubi_ec_hdr *ec_hdr); -static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum); -static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum, - const struct ubi_vid_hdr *vid_hdr); -static int paranoid_check_all_ff(struct ubi_device *ubi, int pnum, int offset, - int len); -#else -#define paranoid_check_not_bad(ubi, pnum) 0 -#define paranoid_check_peb_ec_hdr(ubi, pnum) 0 -#define paranoid_check_ec_hdr(ubi, pnum, ec_hdr) 0 -#define paranoid_check_peb_vid_hdr(ubi, pnum) 0 -#define paranoid_check_vid_hdr(ubi, pnum, vid_hdr) 0 -#define paranoid_check_all_ff(ubi, pnum, offset, len) 0 -#endif +static int self_check_not_bad(const struct ubi_device *ubi, int pnum); +static int self_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum); +static int self_check_ec_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_ec_hdr *ec_hdr); +static int self_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum); +static int self_check_vid_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_vid_hdr *vid_hdr); +static int self_check_write(struct ubi_device *ubi, const void *buf, int pnum, + int offset, int len); /** * ubi_io_read - read data from a physical eraseblock. @@ -136,51 +129,77 @@ int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, ubi_assert(offset >= 0 && offset + len <= ubi->peb_size); ubi_assert(len > 0); - err = paranoid_check_not_bad(ubi, pnum); + err = self_check_not_bad(ubi, pnum); if (err) - return err > 0 ? -EINVAL : err; + return err; + + /* + * Deliberately corrupt the buffer to improve robustness. Indeed, if we + * do not do this, the following may happen: + * 1. The buffer contains data from previous operation, e.g., read from + * another PEB previously. The data looks like expected, e.g., if we + * just do not read anything and return - the caller would not + * notice this. E.g., if we are reading a VID header, the buffer may + * contain a valid VID header from another PEB. + * 2. The driver is buggy and returns us success or -EBADMSG or + * -EUCLEAN, but it does not actually put any data to the buffer. + * + * This may confuse UBI or upper layers - they may think the buffer + * contains valid data while in fact it is just old data. This is + * especially possible because UBI (and UBIFS) relies on CRC, and + * treats data as correct even in case of ECC errors if the CRC is + * correct. + * + * Try to prevent this situation by changing the first byte of the + * buffer. + */ + *((uint8_t *)buf) ^= 0xFF; addr = (loff_t)pnum * ubi->peb_size + offset; retry: err = mtd_read(ubi->mtd, addr, len, &read, buf); if (err) { - if (err == -EUCLEAN) { + const char *errstr = mtd_is_eccerr(err) ? " (ECC error)" : ""; + + if (mtd_is_bitflip(err)) { /* * -EUCLEAN is reported if there was a bit-flip which * was corrected, so this is harmless. + * + * We do not report about it here unless debugging is + * enabled. A corresponding message will be printed + * later, when it is has been scrubbed. */ ubi_msg("fixable bit-flip detected at PEB %d", pnum); ubi_assert(len == read); return UBI_IO_BITFLIPS; } - if (read != len && retries++ < UBI_IO_RETRIES) { - dbg_io("error %d while reading %d bytes from PEB %d:%d, " - "read only %zd bytes, retry", - err, len, pnum, offset, read); + if (retries++ < UBI_IO_RETRIES) { + ubi_warn("error %d%s while reading %d bytes from PEB %d:%d, read only %zd bytes, retry", + err, errstr, len, pnum, offset, read); yield(); goto retry; } - ubi_err("error %d while reading %d bytes from PEB %d:%d, " - "read %zd bytes", err, len, pnum, offset, read); - ubi_dbg_dump_stack(); + ubi_err("error %d%s while reading %d bytes from PEB %d:%d, read %zd bytes", + err, errstr, len, pnum, offset, read); + dump_stack(); /* * The driver should never return -EBADMSG if it failed to read * all the requested data. But some buggy drivers might do * this, so we change it to -EIO. */ - if (read != len && err == -EBADMSG) { + if (read != len && mtd_is_eccerr(err)) { ubi_assert(0); - printk("%s[%d] not here\n", __func__, __LINE__); -/* err = -EIO; */ + err = -EIO; } } else { ubi_assert(len == read); - if (ubi_dbg_is_bitflip()) { - dbg_msg("bit-flip (emulated)"); + if (ubi_dbg_is_bitflip(ubi)) { + dbg_gen("bit-flip (emulated)"); err = UBI_IO_BITFLIPS; } } @@ -224,46 +243,60 @@ int ubi_io_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, return -EROFS; } - /* The below has to be compiled out if paranoid checks are disabled */ - - err = paranoid_check_not_bad(ubi, pnum); + err = self_check_not_bad(ubi, pnum); if (err) - return err > 0 ? -EINVAL : err; + return err; /* The area we are writing to has to contain all 0xFF bytes */ - err = paranoid_check_all_ff(ubi, pnum, offset, len); + err = ubi_self_check_all_ff(ubi, pnum, offset, len); if (err) - return err > 0 ? -EINVAL : err; + return err; if (offset >= ubi->leb_start) { /* * We write to the data area of the physical eraseblock. Make * sure it has valid EC and VID headers. */ - err = paranoid_check_peb_ec_hdr(ubi, pnum); + err = self_check_peb_ec_hdr(ubi, pnum); if (err) - return err > 0 ? -EINVAL : err; - err = paranoid_check_peb_vid_hdr(ubi, pnum); + return err; + err = self_check_peb_vid_hdr(ubi, pnum); if (err) - return err > 0 ? -EINVAL : err; + return err; } - if (ubi_dbg_is_write_failure()) { - dbg_err("cannot write %d bytes to PEB %d:%d " - "(emulated)", len, pnum, offset); - ubi_dbg_dump_stack(); + if (ubi_dbg_is_write_failure(ubi)) { + ubi_err("cannot write %d bytes to PEB %d:%d (emulated)", + len, pnum, offset); + dump_stack(); return -EIO; } addr = (loff_t)pnum * ubi->peb_size + offset; err = mtd_write(ubi->mtd, addr, len, &written, buf); if (err) { - ubi_err("error %d while writing %d bytes to PEB %d:%d, written" - " %zd bytes", err, len, pnum, offset, written); - ubi_dbg_dump_stack(); + ubi_err("error %d while writing %d bytes to PEB %d:%d, written %zd bytes", + err, len, pnum, offset, written); + dump_stack(); + ubi_dump_flash(ubi, pnum, offset, len); } else ubi_assert(written == len); + if (!err) { + err = self_check_write(ubi, buf, pnum, offset, len); + if (err) + return err; + + /* + * Since we always write sequentially, the rest of the PEB has + * to contain only 0xFF bytes. + */ + offset += len; + len = ubi->peb_size - offset; + if (len) + err = ubi_self_check_all_ff(ubi, pnum, offset, len); + } + return err; } @@ -295,6 +328,12 @@ static int do_sync_erase(struct ubi_device *ubi, int pnum) wait_queue_head_t wq; dbg_io("erase PEB %d", pnum); + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + if (ubi->ro_mode) { + ubi_err("read-only mode"); + return -EROFS; + } retry: init_waitqueue_head(&wq); @@ -309,13 +348,13 @@ retry: err = mtd_erase(ubi->mtd, &ei); if (err) { if (retries++ < UBI_IO_RETRIES) { - dbg_io("error %d while erasing PEB %d, retry", - err, pnum); + ubi_warn("error %d while erasing PEB %d, retry", + err, pnum); yield(); goto retry; } ubi_err("cannot erase PEB %d, error %d", pnum, err); - ubi_dbg_dump_stack(); + dump_stack(); return err; } @@ -328,46 +367,27 @@ retry: if (ei.state == MTD_ERASE_FAILED) { if (retries++ < UBI_IO_RETRIES) { - dbg_io("error while erasing PEB %d, retry", pnum); + ubi_warn("error while erasing PEB %d, retry", pnum); yield(); goto retry; } ubi_err("cannot erase PEB %d", pnum); - ubi_dbg_dump_stack(); + dump_stack(); return -EIO; } - err = paranoid_check_all_ff(ubi, pnum, 0, ubi->peb_size); + err = ubi_self_check_all_ff(ubi, pnum, 0, ubi->peb_size); if (err) - return err > 0 ? -EINVAL : err; + return err; - if (ubi_dbg_is_erase_failure() && !err) { - dbg_err("cannot erase PEB %d (emulated)", pnum); + if (ubi_dbg_is_erase_failure(ubi)) { + ubi_err("cannot erase PEB %d (emulated)", pnum); return -EIO; } return 0; } -/** - * check_pattern - check if buffer contains only a certain byte pattern. - * @buf: buffer to check - * @patt: the pattern to check - * @size: buffer size in bytes - * - * This function returns %1 in there are only @patt bytes in @buf, and %0 if - * something else was also found. - */ -static int check_pattern(const void *buf, uint8_t patt, int size) -{ - int i; - - for (i = 0; i < size; i++) - if (((const uint8_t *)buf)[i] != patt) - return 0; - return 1; -} - /* Patterns to write to a physical eraseblock when torturing it */ static uint8_t patterns[] = {0xa5, 0x5a, 0x0}; @@ -384,6 +404,7 @@ static int torture_peb(struct ubi_device *ubi, int pnum) { int err, i, patt_count; + ubi_msg("run torture test for PEB %d", pnum); patt_count = ARRAY_SIZE(patterns); ubi_assert(patt_count > 0); @@ -394,11 +415,11 @@ static int torture_peb(struct ubi_device *ubi, int pnum) goto out; /* Make sure the PEB contains only 0xFF bytes */ - err = ubi_io_read(ubi, ubi->peb_buf1, pnum, 0, ubi->peb_size); + err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); if (err) goto out; - err = check_pattern(ubi->peb_buf1, 0xFF, ubi->peb_size); + err = ubi_check_pattern(ubi->peb_buf, 0xFF, ubi->peb_size); if (err == 0) { ubi_err("erased PEB %d, but a non-0xFF byte found", pnum); @@ -407,17 +428,18 @@ static int torture_peb(struct ubi_device *ubi, int pnum) } /* Write a pattern and check it */ - memset(ubi->peb_buf1, patterns[i], ubi->peb_size); - err = ubi_io_write(ubi, ubi->peb_buf1, pnum, 0, ubi->peb_size); + memset(ubi->peb_buf, patterns[i], ubi->peb_size); + err = ubi_io_write(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); if (err) goto out; - memset(ubi->peb_buf1, ~patterns[i], ubi->peb_size); - err = ubi_io_read(ubi, ubi->peb_buf1, pnum, 0, ubi->peb_size); + memset(ubi->peb_buf, ~patterns[i], ubi->peb_size); + err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); if (err) goto out; - err = check_pattern(ubi->peb_buf1, patterns[i], ubi->peb_size); + err = ubi_check_pattern(ubi->peb_buf, patterns[i], + ubi->peb_size); if (err == 0) { ubi_err("pattern %x checking failed for PEB %d", patterns[i], pnum); @@ -427,10 +449,11 @@ static int torture_peb(struct ubi_device *ubi, int pnum) } err = patt_count; + ubi_msg("PEB %d passed torture test, do not mark it as bad", pnum); out: mutex_unlock(&ubi->buf_mutex); - if (err == UBI_IO_BITFLIPS || err == -EBADMSG) { + if (err == UBI_IO_BITFLIPS || mtd_is_eccerr(err)) { /* * If a bit-flip or data integrity error was detected, the test * has not passed because it happened on a freshly erased @@ -444,6 +467,80 @@ out: } /** + * nor_erase_prepare - prepare a NOR flash PEB for erasure. + * @ubi: UBI device description object + * @pnum: physical eraseblock number to prepare + * + * NOR flash, or at least some of them, have peculiar embedded PEB erasure + * algorithm: the PEB is first filled with zeroes, then it is erased. And + * filling with zeroes starts from the end of the PEB. This was observed with + * Spansion S29GL512N NOR flash. + * + * This means that in case of a power cut we may end up with intact data at the + * beginning of the PEB, and all zeroes at the end of PEB. In other words, the + * EC and VID headers are OK, but a large chunk of data at the end of PEB is + * zeroed. This makes UBI mistakenly treat this PEB as used and associate it + * with an LEB, which leads to subsequent failures (e.g., UBIFS fails). + * + * This function is called before erasing NOR PEBs and it zeroes out EC and VID + * magic numbers in order to invalidate them and prevent the failures. Returns + * zero in case of success and a negative error code in case of failure. + */ +static int nor_erase_prepare(struct ubi_device *ubi, int pnum) +{ + int err; + size_t written; + loff_t addr; + uint32_t data = 0; + struct ubi_ec_hdr ec_hdr; + + /* + * Note, we cannot generally define VID header buffers on stack, + * because of the way we deal with these buffers (see the header + * comment in this file). But we know this is a NOR-specific piece of + * code, so we can do this. But yes, this is error-prone and we should + * (pre-)allocate VID header buffer instead. + */ + struct ubi_vid_hdr vid_hdr; + + /* + * If VID or EC is valid, we have to corrupt them before erasing. + * It is important to first invalidate the EC header, and then the VID + * header. Otherwise a power cut may lead to valid EC header and + * invalid VID header, in which case UBI will treat this PEB as + * corrupted and will try to preserve it, and print scary warnings. + */ + addr = (loff_t)pnum * ubi->peb_size; + err = ubi_io_read_ec_hdr(ubi, pnum, &ec_hdr, 0); + if (err != UBI_IO_BAD_HDR_EBADMSG && err != UBI_IO_BAD_HDR && + err != UBI_IO_FF){ + err = mtd_write(ubi->mtd, addr, 4, &written, (void *)&data); + if(err) + goto error; + } + + err = ubi_io_read_vid_hdr(ubi, pnum, &vid_hdr, 0); + if (err != UBI_IO_BAD_HDR_EBADMSG && err != UBI_IO_BAD_HDR && + err != UBI_IO_FF){ + addr += ubi->vid_hdr_aloffset; + err = mtd_write(ubi->mtd, addr, 4, &written, (void *)&data); + if (err) + goto error; + } + return 0; + +error: + /* + * The PEB contains a valid VID or EC header, but we cannot invalidate + * it. Supposedly the flash media or the driver is screwed up, so + * return an error. + */ + ubi_err("cannot invalidate PEB %d, write returned %d", pnum, err); + ubi_dump_flash(ubi, pnum, 0, ubi->peb_size); + return -EIO; +} + +/** * ubi_io_sync_erase - synchronously erase a physical eraseblock. * @ubi: UBI device description object * @pnum: physical eraseblock number to erase @@ -452,7 +549,7 @@ out: * This function synchronously erases physical eraseblock @pnum. If @torture * flag is not zero, the physical eraseblock is checked by means of writing * different patterns to it and reading them back. If the torturing is enabled, - * the physical eraseblock is erased more then once. + * the physical eraseblock is erased more than once. * * This function returns the number of erasures made in case of success, %-EIO * if the erasure failed or the torturing test failed, and other negative error @@ -465,15 +562,21 @@ int ubi_io_sync_erase(struct ubi_device *ubi, int pnum, int torture) ubi_assert(pnum >= 0 && pnum < ubi->peb_count); - err = paranoid_check_not_bad(ubi, pnum); + err = self_check_not_bad(ubi, pnum); if (err != 0) - return err > 0 ? -EINVAL : err; + return err; if (ubi->ro_mode) { ubi_err("read-only mode"); return -EROFS; } + if (ubi->nor_flash) { + err = nor_erase_prepare(ubi, pnum); + if (err) + return err; + } + if (torture) { ret = torture_peb(ubi, pnum); if (ret < 0) @@ -564,8 +667,7 @@ static int validate_ec_hdr(const struct ubi_device *ubi, leb_start = be32_to_cpu(ec_hdr->data_offset); if (ec_hdr->version != UBI_VERSION) { - ubi_err("node with incompatible UBI version found: " - "this UBI version is %d, image version is %d", + ubi_err("node with incompatible UBI version found: this UBI version is %d, image version is %d", UBI_VERSION, (int)ec_hdr->version); goto bad; } @@ -591,8 +693,8 @@ static int validate_ec_hdr(const struct ubi_device *ubi, bad: ubi_err("bad EC header"); - ubi_dbg_dump_ec_hdr(ec_hdr); - ubi_dbg_dump_stack(); + ubi_dump_ec_hdr(ec_hdr); + dump_stack(); return 1; } @@ -612,67 +714,58 @@ bad: * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected * and corrected by the flash driver; this is harmless but may indicate that * this eraseblock may become bad soon (but may be not); - * o %UBI_IO_BAD_EC_HDR if the erase counter header is corrupted (a CRC error); - * o %UBI_IO_PEB_EMPTY if the physical eraseblock is empty; + * o %UBI_IO_BAD_HDR if the erase counter header is corrupted (a CRC error); + * o %UBI_IO_BAD_HDR_EBADMSG is the same as %UBI_IO_BAD_HDR, but there also was + * a data integrity error (uncorrectable ECC error in case of NAND); + * o %UBI_IO_FF if only 0xFF bytes were read (the PEB is supposedly empty) * o a negative error code in case of failure. */ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, struct ubi_ec_hdr *ec_hdr, int verbose) { - int err, read_err = 0; + int err, read_err; uint32_t crc, magic, hdr_crc; dbg_io("read EC header from PEB %d", pnum); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); - if (UBI_IO_DEBUG) - verbose = 1; - err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); - if (err) { - if (err != UBI_IO_BITFLIPS && err != -EBADMSG) - return err; + read_err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); + if (read_err) { + if (read_err != UBI_IO_BITFLIPS && !mtd_is_eccerr(read_err)) + return read_err; /* * We read all the data, but either a correctable bit-flip - * occurred, or MTD reported about some data integrity error, - * like an ECC error in case of NAND. The former is harmless, - * the later may mean that the read data is corrupted. But we - * have a CRC check-sum and we will detect this. If the EC - * header is still OK, we just report this as there was a - * bit-flip. + * occurred, or MTD reported a data integrity error + * (uncorrectable ECC error in case of NAND). The former is + * harmless, the later may mean that the read data is + * corrupted. But we have a CRC check-sum and we will detect + * this. If the EC header is still OK, we just report this as + * there was a bit-flip, to force scrubbing. */ - read_err = err; } magic = be32_to_cpu(ec_hdr->magic); if (magic != UBI_EC_HDR_MAGIC) { + if (mtd_is_eccerr(read_err)) + return UBI_IO_BAD_HDR_EBADMSG; + /* * The magic field is wrong. Let's check if we have read all * 0xFF. If yes, this physical eraseblock is assumed to be * empty. - * - * But if there was a read error, we do not test it for all - * 0xFFs. Even if it does contain all 0xFFs, this error - * indicates that something is still wrong with this physical - * eraseblock and we anyway cannot treat it as empty. */ - if (read_err != -EBADMSG && - check_pattern(ec_hdr, 0xFF, UBI_EC_HDR_SIZE)) { + if (ubi_check_pattern(ec_hdr, 0xFF, UBI_EC_HDR_SIZE)) { /* The physical eraseblock is supposedly empty */ - - /* - * The below is just a paranoid check, it has to be - * compiled out if paranoid checks are disabled. - */ - err = paranoid_check_all_ff(ubi, pnum, 0, - ubi->peb_size); - if (err) - return err > 0 ? UBI_IO_BAD_EC_HDR : err; - if (verbose) - ubi_warn("no EC header found at PEB %d, " - "only 0xFF bytes", pnum); - return UBI_IO_PEB_EMPTY; + ubi_warn("no EC header found at PEB %d, only 0xFF bytes", + pnum); + dbg_bld("no EC header found at PEB %d, only 0xFF bytes", + pnum); + if (!read_err) + return UBI_IO_FF; + else + return UBI_IO_FF_BITFLIPS; } /* @@ -680,11 +773,13 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, * 0xFF bytes. Report that the header is corrupted. */ if (verbose) { - ubi_warn("bad magic number at PEB %d: %08x instead of " - "%08x", pnum, magic, UBI_EC_HDR_MAGIC); - ubi_dbg_dump_ec_hdr(ec_hdr); + ubi_warn("bad magic number at PEB %d: %08x instead of %08x", + pnum, magic, UBI_EC_HDR_MAGIC); + ubi_dump_ec_hdr(ec_hdr); } - return UBI_IO_BAD_EC_HDR; + dbg_bld("bad magic number at PEB %d: %08x instead of %08x", + pnum, magic, UBI_EC_HDR_MAGIC); + return UBI_IO_BAD_HDR; } crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); @@ -692,11 +787,17 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, if (hdr_crc != crc) { if (verbose) { - ubi_warn("bad EC header CRC at PEB %d, calculated %#08x," - " read %#08x", pnum, crc, hdr_crc); - ubi_dbg_dump_ec_hdr(ec_hdr); + ubi_warn("bad EC header CRC at PEB %d, calculated %#08x, read %#08x", + pnum, crc, hdr_crc); + ubi_dump_ec_hdr(ec_hdr); } - return UBI_IO_BAD_EC_HDR; + dbg_bld("bad EC header CRC at PEB %d, calculated %#08x, read %#08x", + pnum, crc, hdr_crc); + + if (!read_err) + return UBI_IO_BAD_HDR; + else + return UBI_IO_BAD_HDR_EBADMSG; } /* And of course validate what has just been read from the media */ @@ -706,6 +807,10 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, return -EINVAL; } + /* + * If there was %-EBADMSG, but the header CRC is still OK, report about + * a bit-flip to force scrubbing on this PEB. + */ return read_err ? UBI_IO_BITFLIPS : 0; } @@ -737,12 +842,13 @@ int ubi_io_write_ec_hdr(struct ubi_device *ubi, int pnum, ec_hdr->version = UBI_VERSION; ec_hdr->vid_hdr_offset = cpu_to_be32(ubi->vid_hdr_offset); ec_hdr->data_offset = cpu_to_be32(ubi->leb_start); + ec_hdr->image_seq = cpu_to_be32(ubi->image_seq); crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); ec_hdr->hdr_crc = cpu_to_be32(crc); - err = paranoid_check_ec_hdr(ubi, pnum, ec_hdr); + err = self_check_ec_hdr(ubi, pnum, ec_hdr); if (err) - return -EINVAL; + return err; err = ubi_io_write(ubi, ec_hdr, pnum, 0, ubi->ec_hdr_alsize); return err; @@ -771,40 +877,40 @@ static int validate_vid_hdr(const struct ubi_device *ubi, int usable_leb_size = ubi->leb_size - data_pad; if (copy_flag != 0 && copy_flag != 1) { - dbg_err("bad copy_flag"); + ubi_err("bad copy_flag"); goto bad; } if (vol_id < 0 || lnum < 0 || data_size < 0 || used_ebs < 0 || data_pad < 0) { - dbg_err("negative values"); + ubi_err("negative values"); goto bad; } if (vol_id >= UBI_MAX_VOLUMES && vol_id < UBI_INTERNAL_VOL_START) { - dbg_err("bad vol_id"); + ubi_err("bad vol_id"); goto bad; } if (vol_id < UBI_INTERNAL_VOL_START && compat != 0) { - dbg_err("bad compat"); + ubi_err("bad compat"); goto bad; } if (vol_id >= UBI_INTERNAL_VOL_START && compat != UBI_COMPAT_DELETE && compat != UBI_COMPAT_RO && compat != UBI_COMPAT_PRESERVE && compat != UBI_COMPAT_REJECT) { - dbg_err("bad compat"); + ubi_err("bad compat"); goto bad; } if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) { - dbg_err("bad vol_type"); + ubi_err("bad vol_type"); goto bad; } if (data_pad >= ubi->leb_size / 2) { - dbg_err("bad data_pad"); + ubi_err("bad data_pad"); goto bad; } @@ -816,45 +922,45 @@ static int validate_vid_hdr(const struct ubi_device *ubi, * mapped logical eraseblocks. */ if (used_ebs == 0) { - dbg_err("zero used_ebs"); + ubi_err("zero used_ebs"); goto bad; } if (data_size == 0) { - dbg_err("zero data_size"); + ubi_err("zero data_size"); goto bad; } if (lnum < used_ebs - 1) { if (data_size != usable_leb_size) { - dbg_err("bad data_size"); + ubi_err("bad data_size"); goto bad; } } else if (lnum == used_ebs - 1) { if (data_size == 0) { - dbg_err("bad data_size at last LEB"); + ubi_err("bad data_size at last LEB"); goto bad; } } else { - dbg_err("too high lnum"); + ubi_err("too high lnum"); goto bad; } } else { if (copy_flag == 0) { if (data_crc != 0) { - dbg_err("non-zero data CRC"); + ubi_err("non-zero data CRC"); goto bad; } if (data_size != 0) { - dbg_err("non-zero data_size"); + ubi_err("non-zero data_size"); goto bad; } } else { if (data_size == 0) { - dbg_err("zero data_size of copy"); + ubi_err("zero data_size of copy"); goto bad; } } if (used_ebs != 0) { - dbg_err("bad used_ebs"); + ubi_err("bad used_ebs"); goto bad; } } @@ -863,8 +969,8 @@ static int validate_vid_hdr(const struct ubi_device *ubi, bad: ubi_err("bad VID header"); - ubi_dbg_dump_vid_hdr(vid_hdr); - ubi_dbg_dump_stack(); + ubi_dump_vid_hdr(vid_hdr); + dump_stack(); return 1; } @@ -878,88 +984,53 @@ bad: * * This function reads the volume identifier header from physical eraseblock * @pnum and stores it in @vid_hdr. It also checks CRC checksum of the read - * volume identifier header. The following codes may be returned: + * volume identifier header. The error codes are the same as in + * 'ubi_io_read_ec_hdr()'. * - * o %0 if the CRC checksum is correct and the header was successfully read; - * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected - * and corrected by the flash driver; this is harmless but may indicate that - * this eraseblock may become bad soon; - * o %UBI_IO_BAD_VID_HRD if the volume identifier header is corrupted (a CRC - * error detected); - * o %UBI_IO_PEB_FREE if the physical eraseblock is free (i.e., there is no VID - * header there); - * o a negative error code in case of failure. + * Note, the implementation of this function is also very similar to + * 'ubi_io_read_ec_hdr()', so refer commentaries in 'ubi_io_read_ec_hdr()'. */ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, struct ubi_vid_hdr *vid_hdr, int verbose) { - int err, read_err = 0; + int err, read_err; uint32_t crc, magic, hdr_crc; void *p; dbg_io("read VID header from PEB %d", pnum); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); - if (UBI_IO_DEBUG) - verbose = 1; p = (char *)vid_hdr - ubi->vid_hdr_shift; - err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, + read_err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, ubi->vid_hdr_alsize); - if (err) { - if (err != UBI_IO_BITFLIPS && err != -EBADMSG) - return err; - - /* - * We read all the data, but either a correctable bit-flip - * occurred, or MTD reported about some data integrity error, - * like an ECC error in case of NAND. The former is harmless, - * the later may mean the read data is corrupted. But we have a - * CRC check-sum and we will identify this. If the VID header is - * still OK, we just report this as there was a bit-flip. - */ - read_err = err; - } + if (read_err && read_err != UBI_IO_BITFLIPS && !mtd_is_eccerr(read_err)) + return read_err; magic = be32_to_cpu(vid_hdr->magic); if (magic != UBI_VID_HDR_MAGIC) { - /* - * If we have read all 0xFF bytes, the VID header probably does - * not exist and the physical eraseblock is assumed to be free. - * - * But if there was a read error, we do not test the data for - * 0xFFs. Even if it does contain all 0xFFs, this error - * indicates that something is still wrong with this physical - * eraseblock and it cannot be regarded as free. - */ - if (read_err != -EBADMSG && - check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) { - /* The physical eraseblock is supposedly free */ - - /* - * The below is just a paranoid check, it has to be - * compiled out if paranoid checks are disabled. - */ - err = paranoid_check_all_ff(ubi, pnum, ubi->leb_start, - ubi->leb_size); - if (err) - return err > 0 ? UBI_IO_BAD_VID_HDR : err; + if (mtd_is_eccerr(read_err)) + return UBI_IO_BAD_HDR_EBADMSG; + if (ubi_check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) { if (verbose) - ubi_warn("no VID header found at PEB %d, " - "only 0xFF bytes", pnum); - return UBI_IO_PEB_FREE; + ubi_warn("no VID header found at PEB %d, only 0xFF bytes", + pnum); + dbg_bld("no VID header found at PEB %d, only 0xFF bytes", + pnum); + if (!read_err) + return UBI_IO_FF; + else + return UBI_IO_FF_BITFLIPS; } - /* - * This is not a valid VID header, and these are not 0xFF - * bytes. Report that the header is corrupted. - */ if (verbose) { - ubi_warn("bad magic number at PEB %d: %08x instead of " - "%08x", pnum, magic, UBI_VID_HDR_MAGIC); - ubi_dbg_dump_vid_hdr(vid_hdr); + ubi_warn("bad magic number at PEB %d: %08x instead of %08x", + pnum, magic, UBI_VID_HDR_MAGIC); + ubi_dump_vid_hdr(vid_hdr); } - return UBI_IO_BAD_VID_HDR; + dbg_bld("bad magic number at PEB %d: %08x instead of %08x", + pnum, magic, UBI_VID_HDR_MAGIC); + return UBI_IO_BAD_HDR; } crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC); @@ -967,14 +1038,18 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, if (hdr_crc != crc) { if (verbose) { - ubi_warn("bad CRC at PEB %d, calculated %#08x, " - "read %#08x", pnum, crc, hdr_crc); - ubi_dbg_dump_vid_hdr(vid_hdr); + ubi_warn("bad CRC at PEB %d, calculated %#08x, read %#08x", + pnum, crc, hdr_crc); + ubi_dump_vid_hdr(vid_hdr); } - return UBI_IO_BAD_VID_HDR; + dbg_bld("bad CRC at PEB %d, calculated %#08x, read %#08x", + pnum, crc, hdr_crc); + if (!read_err) + return UBI_IO_BAD_HDR; + else + return UBI_IO_BAD_HDR_EBADMSG; } - /* Validate the VID header that we have just read */ err = validate_vid_hdr(ubi, vid_hdr); if (err) { ubi_err("validation failed for PEB %d", pnum); @@ -1009,18 +1084,18 @@ int ubi_io_write_vid_hdr(struct ubi_device *ubi, int pnum, dbg_io("write VID header to PEB %d", pnum); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); - err = paranoid_check_peb_ec_hdr(ubi, pnum); + err = self_check_peb_ec_hdr(ubi, pnum); if (err) - return err > 0 ? -EINVAL: err; + return err; vid_hdr->magic = cpu_to_be32(UBI_VID_HDR_MAGIC); vid_hdr->version = UBI_VERSION; crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC); vid_hdr->hdr_crc = cpu_to_be32(crc); - err = paranoid_check_vid_hdr(ubi, pnum, vid_hdr); + err = self_check_vid_hdr(ubi, pnum, vid_hdr); if (err) - return -EINVAL; + return err; p = (char *)vid_hdr - ubi->vid_hdr_shift; err = ubi_io_write(ubi, p, pnum, ubi->vid_hdr_aloffset, @@ -1028,44 +1103,48 @@ int ubi_io_write_vid_hdr(struct ubi_device *ubi, int pnum, return err; } -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID - /** - * paranoid_check_not_bad - ensure that a physical eraseblock is not bad. + * self_check_not_bad - ensure that a physical eraseblock is not bad. * @ubi: UBI device description object * @pnum: physical eraseblock number to check * - * This function returns zero if the physical eraseblock is good, a positive - * number if it is bad and a negative error code if an error occurred. + * This function returns zero if the physical eraseblock is good, %-EINVAL if + * it is bad and a negative error code if an error occurred. */ -static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum) +static int self_check_not_bad(const struct ubi_device *ubi, int pnum) { int err; + if (!ubi_dbg_chk_io(ubi)) + return 0; + err = ubi_io_is_bad(ubi, pnum); if (!err) return err; - ubi_err("paranoid check failed for PEB %d", pnum); - ubi_dbg_dump_stack(); - return err; + ubi_err("self-check failed for PEB %d", pnum); + dump_stack(); + return err > 0 ? -EINVAL : err; } /** - * paranoid_check_ec_hdr - check if an erase counter header is all right. + * self_check_ec_hdr - check if an erase counter header is all right. * @ubi: UBI device description object * @pnum: physical eraseblock number the erase counter header belongs to * @ec_hdr: the erase counter header to check * * This function returns zero if the erase counter header contains valid - * values, and %1 if not. + * values, and %-EINVAL if not. */ -static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum, - const struct ubi_ec_hdr *ec_hdr) +static int self_check_ec_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_ec_hdr *ec_hdr) { int err; uint32_t magic; + if (!ubi_dbg_chk_io(ubi)) + return 0; + magic = be32_to_cpu(ec_hdr->magic); if (magic != UBI_EC_HDR_MAGIC) { ubi_err("bad magic %#08x, must be %#08x", @@ -1075,53 +1154,55 @@ static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum, err = validate_ec_hdr(ubi, ec_hdr); if (err) { - ubi_err("paranoid check failed for PEB %d", pnum); + ubi_err("self-check failed for PEB %d", pnum); goto fail; } return 0; fail: - ubi_dbg_dump_ec_hdr(ec_hdr); - ubi_dbg_dump_stack(); - return 1; + ubi_dump_ec_hdr(ec_hdr); + dump_stack(); + return -EINVAL; } /** - * paranoid_check_peb_ec_hdr - check that the erase counter header of a - * physical eraseblock is in-place and is all right. + * self_check_peb_ec_hdr - check erase counter header. * @ubi: UBI device description object * @pnum: the physical eraseblock number to check * - * This function returns zero if the erase counter header is all right, %1 if - * not, and a negative error code if an error occurred. + * This function returns zero if the erase counter header is all right and and + * a negative error code if not or if an error occurred. */ -static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum) +static int self_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum) { int err; uint32_t crc, hdr_crc; struct ubi_ec_hdr *ec_hdr; + if (!ubi_dbg_chk_io(ubi)) + return 0; + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); if (!ec_hdr) return -ENOMEM; err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); - if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG) + if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err)) goto exit; crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); hdr_crc = be32_to_cpu(ec_hdr->hdr_crc); if (hdr_crc != crc) { ubi_err("bad CRC, calculated %#08x, read %#08x", crc, hdr_crc); - ubi_err("paranoid check failed for PEB %d", pnum); - ubi_dbg_dump_ec_hdr(ec_hdr); - ubi_dbg_dump_stack(); - err = 1; + ubi_err("self-check failed for PEB %d", pnum); + ubi_dump_ec_hdr(ec_hdr); + dump_stack(); + err = -EINVAL; goto exit; } - err = paranoid_check_ec_hdr(ubi, pnum, ec_hdr); + err = self_check_ec_hdr(ubi, pnum, ec_hdr); exit: kfree(ec_hdr); @@ -1129,20 +1210,23 @@ exit: } /** - * paranoid_check_vid_hdr - check that a volume identifier header is all right. + * self_check_vid_hdr - check that a volume identifier header is all right. * @ubi: UBI device description object * @pnum: physical eraseblock number the volume identifier header belongs to * @vid_hdr: the volume identifier header to check * * This function returns zero if the volume identifier header is all right, and - * %1 if not. + * %-EINVAL if not. */ -static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum, - const struct ubi_vid_hdr *vid_hdr) +static int self_check_vid_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_vid_hdr *vid_hdr) { int err; uint32_t magic; + if (!ubi_dbg_chk_io(ubi)) + return 0; + magic = be32_to_cpu(vid_hdr->magic); if (magic != UBI_VID_HDR_MAGIC) { ubi_err("bad VID header magic %#08x at PEB %d, must be %#08x", @@ -1152,36 +1236,38 @@ static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum, err = validate_vid_hdr(ubi, vid_hdr); if (err) { - ubi_err("paranoid check failed for PEB %d", pnum); + ubi_err("self-check failed for PEB %d", pnum); goto fail; } return err; fail: - ubi_err("paranoid check failed for PEB %d", pnum); - ubi_dbg_dump_vid_hdr(vid_hdr); - ubi_dbg_dump_stack(); - return 1; + ubi_err("self-check failed for PEB %d", pnum); + ubi_dump_vid_hdr(vid_hdr); + dump_stack(); + return -EINVAL; } /** - * paranoid_check_peb_vid_hdr - check that the volume identifier header of a - * physical eraseblock is in-place and is all right. + * self_check_peb_vid_hdr - check volume identifier header. * @ubi: UBI device description object * @pnum: the physical eraseblock number to check * * This function returns zero if the volume identifier header is all right, - * %1 if not, and a negative error code if an error occurred. + * and a negative error code if not or if an error occurred. */ -static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum) +static int self_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum) { int err; uint32_t crc, hdr_crc; struct ubi_vid_hdr *vid_hdr; void *p; + if (!ubi_dbg_chk_io(ubi)) + return 0; + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); if (!vid_hdr) return -ENOMEM; @@ -1189,22 +1275,22 @@ static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum) p = (char *)vid_hdr - ubi->vid_hdr_shift; err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, ubi->vid_hdr_alsize); - if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG) + if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err)) goto exit; crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_EC_HDR_SIZE_CRC); hdr_crc = be32_to_cpu(vid_hdr->hdr_crc); if (hdr_crc != crc) { - ubi_err("bad VID header CRC at PEB %d, calculated %#08x, " - "read %#08x", pnum, crc, hdr_crc); - ubi_err("paranoid check failed for PEB %d", pnum); - ubi_dbg_dump_vid_hdr(vid_hdr); - ubi_dbg_dump_stack(); - err = 1; + ubi_err("bad VID header CRC at PEB %d, calculated %#08x, read %#08x", + pnum, crc, hdr_crc); + ubi_err("self-check failed for PEB %d", pnum); + ubi_dump_vid_hdr(vid_hdr); + dump_stack(); + err = -EINVAL; goto exit; } - err = paranoid_check_vid_hdr(ubi, pnum, vid_hdr); + err = self_check_vid_hdr(ubi, pnum, vid_hdr); exit: ubi_free_vid_hdr(ubi, vid_hdr); @@ -1212,51 +1298,123 @@ exit: } /** - * paranoid_check_all_ff - check that a region of flash is empty. + * self_check_write - make sure write succeeded. + * @ubi: UBI device description object + * @buf: buffer with data which were written + * @pnum: physical eraseblock number the data were written to + * @offset: offset within the physical eraseblock the data were written to + * @len: how many bytes were written + * + * This functions reads data which were recently written and compares it with + * the original data buffer - the data have to match. Returns zero if the data + * match and a negative error code if not or in case of failure. + */ +static int self_check_write(struct ubi_device *ubi, const void *buf, int pnum, + int offset, int len) +{ + int err, i; + size_t read; + void *buf1; + loff_t addr = (loff_t)pnum * ubi->peb_size + offset; + + if (!ubi_dbg_chk_io(ubi)) + return 0; + + buf1 = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); + if (!buf1) { + ubi_err("cannot allocate memory to check writes"); + return 0; + } + + err = mtd_read(ubi->mtd, addr, len, &read, buf1); + if (err && !mtd_is_bitflip(err)) + goto out_free; + + for (i = 0; i < len; i++) { + uint8_t c = ((uint8_t *)buf)[i]; + uint8_t c1 = ((uint8_t *)buf1)[i]; +#if !defined(CONFIG_UBI_SILENCE_MSG) + int dump_len = max_t(int, 128, len - i); +#endif + + if (c == c1) + continue; + + ubi_err("self-check failed for PEB %d:%d, len %d", + pnum, offset, len); + ubi_msg("data differ at position %d", i); + ubi_msg("hex dump of the original buffer from %d to %d", + i, i + dump_len); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + buf + i, dump_len, 1); + ubi_msg("hex dump of the read buffer from %d to %d", + i, i + dump_len); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + buf1 + i, dump_len, 1); + dump_stack(); + err = -EINVAL; + goto out_free; + } + + vfree(buf1); + return 0; + +out_free: + vfree(buf1); + return err; +} + +/** + * ubi_self_check_all_ff - check that a region of flash is empty. * @ubi: UBI device description object * @pnum: the physical eraseblock number to check * @offset: the starting offset within the physical eraseblock to check * @len: the length of the region to check * * This function returns zero if only 0xFF bytes are present at offset - * @offset of the physical eraseblock @pnum, %1 if not, and a negative error - * code if an error occurred. + * @offset of the physical eraseblock @pnum, and a negative error code if not + * or if an error occurred. */ -static int paranoid_check_all_ff(struct ubi_device *ubi, int pnum, int offset, - int len) +int ubi_self_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len) { size_t read; int err; + void *buf; loff_t addr = (loff_t)pnum * ubi->peb_size + offset; - mutex_lock(&ubi->dbg_buf_mutex); - err = mtd_read(ubi->mtd, addr, len, &read, ubi->dbg_peb_buf); - if (err && err != -EUCLEAN) { - ubi_err("error %d while reading %d bytes from PEB %d:%d, " - "read %zd bytes", err, len, pnum, offset, read); + if (!ubi_dbg_chk_io(ubi)) + return 0; + + buf = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); + if (!buf) { + ubi_err("cannot allocate memory to check for 0xFFs"); + return 0; + } + + err = mtd_read(ubi->mtd, addr, len, &read, buf); + if (err && !mtd_is_bitflip(err)) { + ubi_err("error %d while reading %d bytes from PEB %d:%d, read %zd bytes", + err, len, pnum, offset, read); goto error; } - err = check_pattern(ubi->dbg_peb_buf, 0xFF, len); + err = ubi_check_pattern(buf, 0xFF, len); if (err == 0) { - ubi_err("flash region at PEB %d:%d, length %d does not " - "contain all 0xFF bytes", pnum, offset, len); + ubi_err("flash region at PEB %d:%d, length %d does not contain all 0xFF bytes", + pnum, offset, len); goto fail; } - mutex_unlock(&ubi->dbg_buf_mutex); + vfree(buf); return 0; fail: - ubi_err("paranoid check failed for PEB %d", pnum); - dbg_msg("hex dump of the %d-%d region", offset, offset + len); - print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, - ubi->dbg_peb_buf, len, 1); - err = 1; + ubi_err("self-check failed for PEB %d", pnum); + ubi_msg("hex dump of the %d-%d region", offset, offset + len); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf, len, 1); + err = -EINVAL; error: - ubi_dbg_dump_stack(); - mutex_unlock(&ubi->dbg_buf_mutex); + dump_stack(); + vfree(buf); return err; } - -#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c index 63c56c9..0183c93 100644 --- a/drivers/mtd/ubi/kapi.c +++ b/drivers/mtd/ubi/kapi.c @@ -8,16 +8,43 @@ /* This file mostly implements UBI kernel API functions */ -#ifdef UBI_LINUX +#define __UBOOT__ +#ifndef __UBOOT__ #include <linux/module.h> -#include <linux/err.h> +#include <linux/slab.h> +#include <linux/namei.h> +#include <linux/fs.h> #include <asm/div64.h> +#else +#include <ubi_uboot.h> #endif +#include <linux/err.h> -#include <ubi_uboot.h> #include "ubi.h" /** + * ubi_do_get_device_info - get information about UBI device. + * @ubi: UBI device description object + * @di: the information is stored here + * + * This function is the same as 'ubi_get_device_info()', but it assumes the UBI + * device is locked and cannot disappear. + */ +void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di) +{ + di->ubi_num = ubi->ubi_num; + di->leb_size = ubi->leb_size; + di->leb_start = ubi->leb_start; + di->min_io_size = ubi->min_io_size; + di->max_write_size = ubi->max_write_size; + di->ro_mode = ubi->ro_mode; +#ifndef __UBOOT__ + di->cdev = ubi->cdev.dev; +#endif +} +EXPORT_SYMBOL_GPL(ubi_do_get_device_info); + +/** * ubi_get_device_info - get information about UBI device. * @ubi_num: UBI device number * @di: the information is stored here @@ -31,33 +58,24 @@ int ubi_get_device_info(int ubi_num, struct ubi_device_info *di) if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) return -EINVAL; - ubi = ubi_get_device(ubi_num); if (!ubi) return -ENODEV; - - di->ubi_num = ubi->ubi_num; - di->leb_size = ubi->leb_size; - di->min_io_size = ubi->min_io_size; - di->ro_mode = ubi->ro_mode; - di->cdev = ubi->cdev.dev; - + ubi_do_get_device_info(ubi, di); ubi_put_device(ubi); return 0; } EXPORT_SYMBOL_GPL(ubi_get_device_info); /** - * ubi_get_volume_info - get information about UBI volume. - * @desc: volume descriptor + * ubi_do_get_volume_info - get information about UBI volume. + * @ubi: UBI device description object + * @vol: volume description object * @vi: the information is stored here */ -void ubi_get_volume_info(struct ubi_volume_desc *desc, - struct ubi_volume_info *vi) +void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol, + struct ubi_volume_info *vi) { - const struct ubi_volume *vol = desc->vol; - const struct ubi_device *ubi = vol->ubi; - vi->vol_id = vol->vol_id; vi->ubi_num = ubi->ubi_num; vi->size = vol->reserved_pebs; @@ -71,6 +89,17 @@ void ubi_get_volume_info(struct ubi_volume_desc *desc, vi->name = vol->name; vi->cdev = vol->cdev.dev; } + +/** + * ubi_get_volume_info - get information about UBI volume. + * @desc: volume descriptor + * @vi: the information is stored here + */ +void ubi_get_volume_info(struct ubi_volume_desc *desc, + struct ubi_volume_info *vi) +{ + ubi_do_get_volume_info(desc->vol->ubi, desc->vol, vi); +} EXPORT_SYMBOL_GPL(ubi_get_volume_info); /** @@ -98,7 +127,7 @@ struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode) struct ubi_device *ubi; struct ubi_volume *vol; - dbg_msg("open device %d volume %d, mode %d", ubi_num, vol_id, mode); + dbg_gen("open device %d, volume %d, mode %d", ubi_num, vol_id, mode); if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) return ERR_PTR(-EINVAL); @@ -188,6 +217,8 @@ out_free: kfree(desc); out_put_ubi: ubi_put_device(ubi); + ubi_err("cannot open device %d, volume %d, error %d", + ubi_num, vol_id, err); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(ubi_open_volume); @@ -207,7 +238,7 @@ struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name, struct ubi_device *ubi; struct ubi_volume_desc *ret; - dbg_msg("open volume %s, mode %d", name, mode); + dbg_gen("open device %d, volume %s, mode %d", ubi_num, name, mode); if (!name) return ERR_PTR(-EINVAL); @@ -249,6 +280,45 @@ struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name, } EXPORT_SYMBOL_GPL(ubi_open_volume_nm); +#ifndef __UBOOT__ +/** + * ubi_open_volume_path - open UBI volume by its character device node path. + * @pathname: volume character device node path + * @mode: open mode + * + * This function is similar to 'ubi_open_volume()', but opens a volume the path + * to its character device node. + */ +struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode) +{ + int error, ubi_num, vol_id, mod; + struct inode *inode; + struct path path; + + dbg_gen("open volume %s, mode %d", pathname, mode); + + if (!pathname || !*pathname) + return ERR_PTR(-EINVAL); + + error = kern_path(pathname, LOOKUP_FOLLOW, &path); + if (error) + return ERR_PTR(error); + + inode = path.dentry->d_inode; + mod = inode->i_mode; + ubi_num = ubi_major2num(imajor(inode)); + vol_id = iminor(inode) - 1; + path_put(&path); + + if (!S_ISCHR(mod)) + return ERR_PTR(-EINVAL); + if (vol_id >= 0 && ubi_num >= 0) + return ubi_open_volume(ubi_num, vol_id, mode); + return ERR_PTR(-ENODEV); +} +EXPORT_SYMBOL_GPL(ubi_open_volume_path); +#endif + /** * ubi_close_volume - close UBI volume. * @desc: volume descriptor @@ -258,7 +328,8 @@ void ubi_close_volume(struct ubi_volume_desc *desc) struct ubi_volume *vol = desc->vol; struct ubi_device *ubi = vol->ubi; - dbg_msg("close volume %d, mode %d", vol->vol_id, desc->mode); + dbg_gen("close device %d, volume %d, mode %d", + ubi->ubi_num, vol->vol_id, desc->mode); spin_lock(&ubi->volumes_lock); switch (desc->mode) { @@ -315,7 +386,7 @@ int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, struct ubi_device *ubi = vol->ubi; int err, vol_id = vol->vol_id; - dbg_msg("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset); + dbg_gen("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset); if (vol_id < 0 || vol_id >= ubi->vtbl_slots || lnum < 0 || lnum >= vol->used_ebs || offset < 0 || len < 0 || @@ -353,11 +424,9 @@ EXPORT_SYMBOL_GPL(ubi_leb_read); * @buf: data to write * @offset: offset within the logical eraseblock where to write * @len: how many bytes to write - * @dtype: expected data type * * This function writes @len bytes of data from @buf to offset @offset of - * logical eraseblock @lnum. The @dtype argument describes expected lifetime of - * the data. + * logical eraseblock @lnum. * * This function takes care of physical eraseblock write failures. If write to * the physical eraseblock write operation fails, the logical eraseblock is @@ -374,13 +443,13 @@ EXPORT_SYMBOL_GPL(ubi_leb_read); * returns immediately with %-EBADF code. */ int ubi_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, - int offset, int len, int dtype) + int offset, int len) { struct ubi_volume *vol = desc->vol; struct ubi_device *ubi = vol->ubi; int vol_id = vol->vol_id; - dbg_msg("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset); + dbg_gen("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset); if (vol_id < 0 || vol_id >= ubi->vtbl_slots) return -EINVAL; @@ -393,17 +462,13 @@ int ubi_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, offset & (ubi->min_io_size - 1) || len & (ubi->min_io_size - 1)) return -EINVAL; - if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && - dtype != UBI_UNKNOWN) - return -EINVAL; - if (vol->upd_marker) return -EBADF; if (len == 0) return 0; - return ubi_eba_write_leb(ubi, vol, lnum, buf, offset, len, dtype); + return ubi_eba_write_leb(ubi, vol, lnum, buf, offset, len); } EXPORT_SYMBOL_GPL(ubi_leb_write); @@ -413,24 +478,23 @@ EXPORT_SYMBOL_GPL(ubi_leb_write); * @lnum: logical eraseblock number to change * @buf: data to write * @len: how many bytes to write - * @dtype: expected data type * * This function changes the contents of a logical eraseblock atomically. @buf * has to contain new logical eraseblock data, and @len - the length of the - * data, which has to be aligned. The length may be shorter then the logical + * data, which has to be aligned. The length may be shorter than the logical * eraseblock size, ant the logical eraseblock may be appended to more times * later on. This function guarantees that in case of an unclean reboot the old * contents is preserved. Returns zero in case of success and a negative error * code in case of failure. */ int ubi_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, - int len, int dtype) + int len) { struct ubi_volume *vol = desc->vol; struct ubi_device *ubi = vol->ubi; int vol_id = vol->vol_id; - dbg_msg("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum); + dbg_gen("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum); if (vol_id < 0 || vol_id >= ubi->vtbl_slots) return -EINVAL; @@ -442,17 +506,13 @@ int ubi_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, len > vol->usable_leb_size || len & (ubi->min_io_size - 1)) return -EINVAL; - if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && - dtype != UBI_UNKNOWN) - return -EINVAL; - if (vol->upd_marker) return -EBADF; if (len == 0) return 0; - return ubi_eba_atomic_leb_change(ubi, vol, lnum, buf, len, dtype); + return ubi_eba_atomic_leb_change(ubi, vol, lnum, buf, len); } EXPORT_SYMBOL_GPL(ubi_leb_change); @@ -474,7 +534,7 @@ int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum) struct ubi_device *ubi = vol->ubi; int err; - dbg_msg("erase LEB %d:%d", vol->vol_id, lnum); + dbg_gen("erase LEB %d:%d", vol->vol_id, lnum); if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) return -EROFS; @@ -489,7 +549,7 @@ int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum) if (err) return err; - return ubi_wl_flush(ubi); + return ubi_wl_flush(ubi, vol->vol_id, lnum); } EXPORT_SYMBOL_GPL(ubi_leb_erase); @@ -500,7 +560,7 @@ EXPORT_SYMBOL_GPL(ubi_leb_erase); * * This function un-maps logical eraseblock @lnum and schedules the * corresponding physical eraseblock for erasure, so that it will eventually be - * physically erased in background. This operation is much faster then the + * physically erased in background. This operation is much faster than the * erase operation. * * Unlike erase, the un-map operation does not guarantee that the logical @@ -519,7 +579,7 @@ EXPORT_SYMBOL_GPL(ubi_leb_erase); * * The main and obvious use-case of this function is when the contents of a * logical eraseblock has to be re-written. Then it is much more efficient to - * first un-map it, then write new data, rather then first erase it, then write + * first un-map it, then write new data, rather than first erase it, then write * new data. Note, once new data has been written to the logical eraseblock, * UBI guarantees that the old contents has gone forever. In other words, if an * unclean reboot happens after the logical eraseblock has been un-mapped and @@ -534,7 +594,7 @@ int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum) struct ubi_volume *vol = desc->vol; struct ubi_device *ubi = vol->ubi; - dbg_msg("unmap LEB %d:%d", vol->vol_id, lnum); + dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum); if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) return -EROFS; @@ -550,13 +610,12 @@ int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum) EXPORT_SYMBOL_GPL(ubi_leb_unmap); /** - * ubi_leb_map - map logical erasblock to a physical eraseblock. + * ubi_leb_map - map logical eraseblock to a physical eraseblock. * @desc: volume descriptor * @lnum: logical eraseblock number - * @dtype: expected data type * * This function maps an un-mapped logical eraseblock @lnum to a physical - * eraseblock. This means, that after a successfull invocation of this + * eraseblock. This means, that after a successful invocation of this * function the logical eraseblock @lnum will be empty (contain only %0xFF * bytes) and be mapped to a physical eraseblock, even if an unclean reboot * happens. @@ -566,12 +625,12 @@ EXPORT_SYMBOL_GPL(ubi_leb_unmap); * eraseblock is already mapped, and other negative error codes in case of * other failures. */ -int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) +int ubi_leb_map(struct ubi_volume_desc *desc, int lnum) { struct ubi_volume *vol = desc->vol; struct ubi_device *ubi = vol->ubi; - dbg_msg("unmap LEB %d:%d", vol->vol_id, lnum); + dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum); if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) return -EROFS; @@ -579,17 +638,13 @@ int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) if (lnum < 0 || lnum >= vol->reserved_pebs) return -EINVAL; - if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && - dtype != UBI_UNKNOWN) - return -EINVAL; - if (vol->upd_marker) return -EBADF; if (vol->eba_tbl[lnum] >= 0) return -EBADMSG; - return ubi_eba_write_leb(ubi, vol, lnum, NULL, 0, 0, dtype); + return ubi_eba_write_leb(ubi, vol, lnum, NULL, 0, 0); } EXPORT_SYMBOL_GPL(ubi_leb_map); @@ -613,7 +668,7 @@ int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum) { struct ubi_volume *vol = desc->vol; - dbg_msg("test LEB %d:%d", vol->vol_id, lnum); + dbg_gen("test LEB %d:%d", vol->vol_id, lnum); if (lnum < 0 || lnum >= vol->reserved_pebs) return -EINVAL; @@ -624,3 +679,110 @@ int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum) return vol->eba_tbl[lnum] >= 0; } EXPORT_SYMBOL_GPL(ubi_is_mapped); + +/** + * ubi_sync - synchronize UBI device buffers. + * @ubi_num: UBI device to synchronize + * + * The underlying MTD device may cache data in hardware or in software. This + * function ensures the caches are flushed. Returns zero in case of success and + * a negative error code in case of failure. + */ +int ubi_sync(int ubi_num) +{ + struct ubi_device *ubi; + + ubi = ubi_get_device(ubi_num); + if (!ubi) + return -ENODEV; + + mtd_sync(ubi->mtd); + ubi_put_device(ubi); + return 0; +} +EXPORT_SYMBOL_GPL(ubi_sync); + +/** + * ubi_flush - flush UBI work queue. + * @ubi_num: UBI device to flush work queue + * @vol_id: volume id to flush for + * @lnum: logical eraseblock number to flush for + * + * This function executes all pending works for a particular volume id / logical + * eraseblock number pair. If either value is set to %UBI_ALL, then it acts as + * a wildcard for all of the corresponding volume numbers or logical + * eraseblock numbers. It returns zero in case of success and a negative error + * code in case of failure. + */ +int ubi_flush(int ubi_num, int vol_id, int lnum) +{ + struct ubi_device *ubi; + int err = 0; + + ubi = ubi_get_device(ubi_num); + if (!ubi) + return -ENODEV; + + err = ubi_wl_flush(ubi, vol_id, lnum); + ubi_put_device(ubi); + return err; +} +EXPORT_SYMBOL_GPL(ubi_flush); + +#ifndef __UBOOT__ +BLOCKING_NOTIFIER_HEAD(ubi_notifiers); + +/** + * ubi_register_volume_notifier - register a volume notifier. + * @nb: the notifier description object + * @ignore_existing: if non-zero, do not send "added" notification for all + * already existing volumes + * + * This function registers a volume notifier, which means that + * 'nb->notifier_call()' will be invoked when an UBI volume is created, + * removed, re-sized, re-named, or updated. The first argument of the function + * is the notification type. The second argument is pointer to a + * &struct ubi_notification object which describes the notification event. + * Using UBI API from the volume notifier is prohibited. + * + * This function returns zero in case of success and a negative error code + * in case of failure. + */ +int ubi_register_volume_notifier(struct notifier_block *nb, + int ignore_existing) +{ + int err; + + err = blocking_notifier_chain_register(&ubi_notifiers, nb); + if (err != 0) + return err; + if (ignore_existing) + return 0; + + /* + * We are going to walk all UBI devices and all volumes, and + * notify the user about existing volumes by the %UBI_VOLUME_ADDED + * event. We have to lock the @ubi_devices_mutex to make sure UBI + * devices do not disappear. + */ + mutex_lock(&ubi_devices_mutex); + ubi_enumerate_volumes(nb); + mutex_unlock(&ubi_devices_mutex); + + return err; +} +EXPORT_SYMBOL_GPL(ubi_register_volume_notifier); + +/** + * ubi_unregister_volume_notifier - unregister the volume notifier. + * @nb: the notifier description object + * + * This function unregisters volume notifier @nm and returns zero in case of + * success and a negative error code in case of failure. + */ +int ubi_unregister_volume_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&ubi_notifiers, nb); +} +EXPORT_SYMBOL_GPL(ubi_unregister_volume_notifier); +#endif diff --git a/drivers/mtd/ubi/misc.c b/drivers/mtd/ubi/misc.c index 5ff55b4..49530b7 100644 --- a/drivers/mtd/ubi/misc.c +++ b/drivers/mtd/ubi/misc.c @@ -81,14 +81,62 @@ int ubi_check_volume(struct ubi_device *ubi, int vol_id) } /** - * ubi_calculate_rsvd_pool - calculate how many PEBs must be reserved for bad + * ubi_update_reserved - update bad eraseblock handling accounting data. + * @ubi: UBI device description object + * + * This function calculates the gap between current number of PEBs reserved for + * bad eraseblock handling and the required level of PEBs that must be + * reserved, and if necessary, reserves more PEBs to fill that gap, according + * to availability. Should be called with ubi->volumes_lock held. + */ +void ubi_update_reserved(struct ubi_device *ubi) +{ + int need = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs; + + if (need <= 0 || ubi->avail_pebs == 0) + return; + + need = min_t(int, need, ubi->avail_pebs); + ubi->avail_pebs -= need; + ubi->rsvd_pebs += need; + ubi->beb_rsvd_pebs += need; + ubi_msg("reserved more %d PEBs for bad PEB handling", need); +} + +/** + * ubi_calculate_reserved - calculate how many PEBs must be reserved for bad * eraseblock handling. * @ubi: UBI device description object */ void ubi_calculate_reserved(struct ubi_device *ubi) { - ubi->beb_rsvd_level = ubi->good_peb_count/100; - ubi->beb_rsvd_level *= CONFIG_MTD_UBI_BEB_RESERVE; - if (ubi->beb_rsvd_level < MIN_RESEVED_PEBS) - ubi->beb_rsvd_level = MIN_RESEVED_PEBS; + /* + * Calculate the actual number of PEBs currently needed to be reserved + * for future bad eraseblock handling. + */ + ubi->beb_rsvd_level = ubi->bad_peb_limit - ubi->bad_peb_count; + if (ubi->beb_rsvd_level < 0) { + ubi->beb_rsvd_level = 0; + ubi_warn("number of bad PEBs (%d) is above the expected limit (%d), not reserving any PEBs for bad PEB handling, will use available PEBs (if any)", + ubi->bad_peb_count, ubi->bad_peb_limit); + } +} + +/** + * ubi_check_pattern - check if buffer contains only a certain byte pattern. + * @buf: buffer to check + * @patt: the pattern to check + * @size: buffer size in bytes + * + * This function returns %1 in there are only @patt bytes in @buf, and %0 if + * something else was also found. + */ +int ubi_check_pattern(const void *buf, uint8_t patt, int size) +{ + int i; + + for (i = 0; i < size; i++) + if (((const uint8_t *)buf)[i] != patt) + return 0; + return 1; } diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c deleted file mode 100644 index a6d0fbc..0000000 --- a/drivers/mtd/ubi/scan.c +++ /dev/null @@ -1,1348 +0,0 @@ -/* - * Copyright (c) International Business Machines Corp., 2006 - * - * SPDX-License-Identifier: GPL-2.0+ - * - * Author: Artem Bityutskiy (Битюцкий Артём) - */ - -/* - * UBI scanning unit. - * - * This unit is responsible for scanning the flash media, checking UBI - * headers and providing complete information about the UBI flash image. - * - * The scanning information is represented by a &struct ubi_scan_info' object. - * Information about found volumes is represented by &struct ubi_scan_volume - * objects which are kept in volume RB-tree with root at the @volumes field. - * The RB-tree is indexed by the volume ID. - * - * Found logical eraseblocks are represented by &struct ubi_scan_leb objects. - * These objects are kept in per-volume RB-trees with the root at the - * corresponding &struct ubi_scan_volume object. To put it differently, we keep - * an RB-tree of per-volume objects and each of these objects is the root of - * RB-tree of per-eraseblock objects. - * - * Corrupted physical eraseblocks are put to the @corr list, free physical - * eraseblocks are put to the @free list and the physical eraseblock to be - * erased are put to the @erase list. - */ - -#ifdef UBI_LINUX -#include <linux/err.h> -#include <linux/crc32.h> -#include <asm/div64.h> -#endif - -#include <ubi_uboot.h> -#include "ubi.h" - -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID -static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si); -#else -#define paranoid_check_si(ubi, si) 0 -#endif - -/* Temporary variables used during scanning */ -static struct ubi_ec_hdr *ech; -static struct ubi_vid_hdr *vidh; - -/** - * add_to_list - add physical eraseblock to a list. - * @si: scanning information - * @pnum: physical eraseblock number to add - * @ec: erase counter of the physical eraseblock - * @list: the list to add to - * - * This function adds physical eraseblock @pnum to free, erase, corrupted or - * alien lists. Returns zero in case of success and a negative error code in - * case of failure. - */ -static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, - struct list_head *list) -{ - struct ubi_scan_leb *seb; - - if (list == &si->free) - dbg_bld("add to free: PEB %d, EC %d", pnum, ec); - else if (list == &si->erase) - dbg_bld("add to erase: PEB %d, EC %d", pnum, ec); - else if (list == &si->corr) - dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec); - else if (list == &si->alien) - dbg_bld("add to alien: PEB %d, EC %d", pnum, ec); - else - BUG(); - - seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL); - if (!seb) - return -ENOMEM; - - seb->pnum = pnum; - seb->ec = ec; - list_add_tail(&seb->u.list, list); - return 0; -} - -/** - * validate_vid_hdr - check that volume identifier header is correct and - * consistent. - * @vid_hdr: the volume identifier header to check - * @sv: information about the volume this logical eraseblock belongs to - * @pnum: physical eraseblock number the VID header came from - * - * This function checks that data stored in @vid_hdr is consistent. Returns - * non-zero if an inconsistency was found and zero if not. - * - * Note, UBI does sanity check of everything it reads from the flash media. - * Most of the checks are done in the I/O unit. Here we check that the - * information in the VID header is consistent to the information in other VID - * headers of the same volume. - */ -static int validate_vid_hdr(const struct ubi_vid_hdr *vid_hdr, - const struct ubi_scan_volume *sv, int pnum) -{ - int vol_type = vid_hdr->vol_type; - int vol_id = be32_to_cpu(vid_hdr->vol_id); - int used_ebs = be32_to_cpu(vid_hdr->used_ebs); - int data_pad = be32_to_cpu(vid_hdr->data_pad); - - if (sv->leb_count != 0) { - int sv_vol_type; - - /* - * This is not the first logical eraseblock belonging to this - * volume. Ensure that the data in its VID header is consistent - * to the data in previous logical eraseblock headers. - */ - - if (vol_id != sv->vol_id) { - dbg_err("inconsistent vol_id"); - goto bad; - } - - if (sv->vol_type == UBI_STATIC_VOLUME) - sv_vol_type = UBI_VID_STATIC; - else - sv_vol_type = UBI_VID_DYNAMIC; - - if (vol_type != sv_vol_type) { - dbg_err("inconsistent vol_type"); - goto bad; - } - - if (used_ebs != sv->used_ebs) { - dbg_err("inconsistent used_ebs"); - goto bad; - } - - if (data_pad != sv->data_pad) { - dbg_err("inconsistent data_pad"); - goto bad; - } - } - - return 0; - -bad: - ubi_err("inconsistent VID header at PEB %d", pnum); - ubi_dbg_dump_vid_hdr(vid_hdr); - ubi_dbg_dump_sv(sv); - return -EINVAL; -} - -/** - * add_volume - add volume to the scanning information. - * @si: scanning information - * @vol_id: ID of the volume to add - * @pnum: physical eraseblock number - * @vid_hdr: volume identifier header - * - * If the volume corresponding to the @vid_hdr logical eraseblock is already - * present in the scanning information, this function does nothing. Otherwise - * it adds corresponding volume to the scanning information. Returns a pointer - * to the scanning volume object in case of success and a negative error code - * in case of failure. - */ -static struct ubi_scan_volume *add_volume(struct ubi_scan_info *si, int vol_id, - int pnum, - const struct ubi_vid_hdr *vid_hdr) -{ - struct ubi_scan_volume *sv; - struct rb_node **p = &si->volumes.rb_node, *parent = NULL; - - ubi_assert(vol_id == be32_to_cpu(vid_hdr->vol_id)); - - /* Walk the volume RB-tree to look if this volume is already present */ - while (*p) { - parent = *p; - sv = rb_entry(parent, struct ubi_scan_volume, rb); - - if (vol_id == sv->vol_id) - return sv; - - if (vol_id > sv->vol_id) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - /* The volume is absent - add it */ - sv = kmalloc(sizeof(struct ubi_scan_volume), GFP_KERNEL); - if (!sv) - return ERR_PTR(-ENOMEM); - - sv->highest_lnum = sv->leb_count = 0; - sv->vol_id = vol_id; - sv->root = RB_ROOT; - sv->used_ebs = be32_to_cpu(vid_hdr->used_ebs); - sv->data_pad = be32_to_cpu(vid_hdr->data_pad); - sv->compat = vid_hdr->compat; - sv->vol_type = vid_hdr->vol_type == UBI_VID_DYNAMIC ? UBI_DYNAMIC_VOLUME - : UBI_STATIC_VOLUME; - if (vol_id > si->highest_vol_id) - si->highest_vol_id = vol_id; - - rb_link_node(&sv->rb, parent, p); - rb_insert_color(&sv->rb, &si->volumes); - si->vols_found += 1; - dbg_bld("added volume %d", vol_id); - return sv; -} - -/** - * compare_lebs - find out which logical eraseblock is newer. - * @ubi: UBI device description object - * @seb: first logical eraseblock to compare - * @pnum: physical eraseblock number of the second logical eraseblock to - * compare - * @vid_hdr: volume identifier header of the second logical eraseblock - * - * This function compares 2 copies of a LEB and informs which one is newer. In - * case of success this function returns a positive value, in case of failure, a - * negative error code is returned. The success return codes use the following - * bits: - * o bit 0 is cleared: the first PEB (described by @seb) is newer then the - * second PEB (described by @pnum and @vid_hdr); - * o bit 0 is set: the second PEB is newer; - * o bit 1 is cleared: no bit-flips were detected in the newer LEB; - * o bit 1 is set: bit-flips were detected in the newer LEB; - * o bit 2 is cleared: the older LEB is not corrupted; - * o bit 2 is set: the older LEB is corrupted. - */ -static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb, - int pnum, const struct ubi_vid_hdr *vid_hdr) -{ - void *buf; - int len, err, second_is_newer, bitflips = 0, corrupted = 0; - uint32_t data_crc, crc; - struct ubi_vid_hdr *vh = NULL; - unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum); - - if (seb->sqnum == 0 && sqnum2 == 0) { - long long abs, v1 = seb->leb_ver, v2 = be32_to_cpu(vid_hdr->leb_ver); - - /* - * UBI constantly increases the logical eraseblock version - * number and it can overflow. Thus, we have to bear in mind - * that versions that are close to %0xFFFFFFFF are less then - * versions that are close to %0. - * - * The UBI WL unit guarantees that the number of pending tasks - * is not greater then %0x7FFFFFFF. So, if the difference - * between any two versions is greater or equivalent to - * %0x7FFFFFFF, there was an overflow and the logical - * eraseblock with lower version is actually newer then the one - * with higher version. - * - * FIXME: but this is anyway obsolete and will be removed at - * some point. - */ - dbg_bld("using old crappy leb_ver stuff"); - - if (v1 == v2) { - ubi_err("PEB %d and PEB %d have the same version %lld", - seb->pnum, pnum, v1); - return -EINVAL; - } - - abs = v1 - v2; - if (abs < 0) - abs = -abs; - - if (abs < 0x7FFFFFFF) - /* Non-overflow situation */ - second_is_newer = (v2 > v1); - else - second_is_newer = (v2 < v1); - } else - /* Obviously the LEB with lower sequence counter is older */ - second_is_newer = sqnum2 > seb->sqnum; - - /* - * Now we know which copy is newer. If the copy flag of the PEB with - * newer version is not set, then we just return, otherwise we have to - * check data CRC. For the second PEB we already have the VID header, - * for the first one - we'll need to re-read it from flash. - * - * FIXME: this may be optimized so that we wouldn't read twice. - */ - - if (second_is_newer) { - if (!vid_hdr->copy_flag) { - /* It is not a copy, so it is newer */ - dbg_bld("second PEB %d is newer, copy_flag is unset", - pnum); - return 1; - } - } else { - pnum = seb->pnum; - - vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); - if (!vh) - return -ENOMEM; - - err = ubi_io_read_vid_hdr(ubi, pnum, vh, 0); - if (err) { - if (err == UBI_IO_BITFLIPS) - bitflips = 1; - else { - dbg_err("VID of PEB %d header is bad, but it " - "was OK earlier", pnum); - if (err > 0) - err = -EIO; - - goto out_free_vidh; - } - } - - if (!vh->copy_flag) { - /* It is not a copy, so it is newer */ - dbg_bld("first PEB %d is newer, copy_flag is unset", - pnum); - err = bitflips << 1; - goto out_free_vidh; - } - - vid_hdr = vh; - } - - /* Read the data of the copy and check the CRC */ - - len = be32_to_cpu(vid_hdr->data_size); - buf = vmalloc(len); - if (!buf) { - err = -ENOMEM; - goto out_free_vidh; - } - - err = ubi_io_read_data(ubi, buf, pnum, 0, len); - if (err && err != UBI_IO_BITFLIPS) - goto out_free_buf; - - data_crc = be32_to_cpu(vid_hdr->data_crc); - crc = crc32(UBI_CRC32_INIT, buf, len); - if (crc != data_crc) { - dbg_bld("PEB %d CRC error: calculated %#08x, must be %#08x", - pnum, crc, data_crc); - corrupted = 1; - bitflips = 0; - second_is_newer = !second_is_newer; - } else { - dbg_bld("PEB %d CRC is OK", pnum); - bitflips = !!err; - } - - vfree(buf); - ubi_free_vid_hdr(ubi, vh); - - if (second_is_newer) - dbg_bld("second PEB %d is newer, copy_flag is set", pnum); - else - dbg_bld("first PEB %d is newer, copy_flag is set", pnum); - - return second_is_newer | (bitflips << 1) | (corrupted << 2); - -out_free_buf: - vfree(buf); -out_free_vidh: - ubi_free_vid_hdr(ubi, vh); - return err; -} - -/** - * ubi_scan_add_used - add information about a physical eraseblock to the - * scanning information. - * @ubi: UBI device description object - * @si: scanning information - * @pnum: the physical eraseblock number - * @ec: erase counter - * @vid_hdr: the volume identifier header - * @bitflips: if bit-flips were detected when this physical eraseblock was read - * - * This function adds information about a used physical eraseblock to the - * 'used' tree of the corresponding volume. The function is rather complex - * because it has to handle cases when this is not the first physical - * eraseblock belonging to the same logical eraseblock, and the newer one has - * to be picked, while the older one has to be dropped. This function returns - * zero in case of success and a negative error code in case of failure. - */ -int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, - int pnum, int ec, const struct ubi_vid_hdr *vid_hdr, - int bitflips) -{ - int err, vol_id, lnum; - uint32_t leb_ver; - unsigned long long sqnum; - struct ubi_scan_volume *sv; - struct ubi_scan_leb *seb; - struct rb_node **p, *parent = NULL; - - vol_id = be32_to_cpu(vid_hdr->vol_id); - lnum = be32_to_cpu(vid_hdr->lnum); - sqnum = be64_to_cpu(vid_hdr->sqnum); - leb_ver = be32_to_cpu(vid_hdr->leb_ver); - - dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, ver %u, bitflips %d", - pnum, vol_id, lnum, ec, sqnum, leb_ver, bitflips); - - sv = add_volume(si, vol_id, pnum, vid_hdr); - if (IS_ERR(sv) < 0) - return PTR_ERR(sv); - - if (si->max_sqnum < sqnum) - si->max_sqnum = sqnum; - - /* - * Walk the RB-tree of logical eraseblocks of volume @vol_id to look - * if this is the first instance of this logical eraseblock or not. - */ - p = &sv->root.rb_node; - while (*p) { - int cmp_res; - - parent = *p; - seb = rb_entry(parent, struct ubi_scan_leb, u.rb); - if (lnum != seb->lnum) { - if (lnum < seb->lnum) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - continue; - } - - /* - * There is already a physical eraseblock describing the same - * logical eraseblock present. - */ - - dbg_bld("this LEB already exists: PEB %d, sqnum %llu, " - "LEB ver %u, EC %d", seb->pnum, seb->sqnum, - seb->leb_ver, seb->ec); - - /* - * Make sure that the logical eraseblocks have different - * versions. Otherwise the image is bad. - */ - if (seb->leb_ver == leb_ver && leb_ver != 0) { - ubi_err("two LEBs with same version %u", leb_ver); - ubi_dbg_dump_seb(seb, 0); - ubi_dbg_dump_vid_hdr(vid_hdr); - return -EINVAL; - } - - /* - * Make sure that the logical eraseblocks have different - * sequence numbers. Otherwise the image is bad. - * - * FIXME: remove 'sqnum != 0' check when leb_ver is removed. - */ - if (seb->sqnum == sqnum && sqnum != 0) { - ubi_err("two LEBs with same sequence number %llu", - sqnum); - ubi_dbg_dump_seb(seb, 0); - ubi_dbg_dump_vid_hdr(vid_hdr); - return -EINVAL; - } - - /* - * Now we have to drop the older one and preserve the newer - * one. - */ - cmp_res = compare_lebs(ubi, seb, pnum, vid_hdr); - if (cmp_res < 0) - return cmp_res; - - if (cmp_res & 1) { - /* - * This logical eraseblock is newer then the one - * found earlier. - */ - err = validate_vid_hdr(vid_hdr, sv, pnum); - if (err) - return err; - - if (cmp_res & 4) - err = add_to_list(si, seb->pnum, seb->ec, - &si->corr); - else - err = add_to_list(si, seb->pnum, seb->ec, - &si->erase); - if (err) - return err; - - seb->ec = ec; - seb->pnum = pnum; - seb->scrub = ((cmp_res & 2) || bitflips); - seb->sqnum = sqnum; - seb->leb_ver = leb_ver; - - if (sv->highest_lnum == lnum) - sv->last_data_size = - be32_to_cpu(vid_hdr->data_size); - - return 0; - } else { - /* - * This logical eraseblock is older then the one found - * previously. - */ - if (cmp_res & 4) - return add_to_list(si, pnum, ec, &si->corr); - else - return add_to_list(si, pnum, ec, &si->erase); - } - } - - /* - * We've met this logical eraseblock for the first time, add it to the - * scanning information. - */ - - err = validate_vid_hdr(vid_hdr, sv, pnum); - if (err) - return err; - - seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL); - if (!seb) - return -ENOMEM; - - seb->ec = ec; - seb->pnum = pnum; - seb->lnum = lnum; - seb->sqnum = sqnum; - seb->scrub = bitflips; - seb->leb_ver = leb_ver; - - if (sv->highest_lnum <= lnum) { - sv->highest_lnum = lnum; - sv->last_data_size = be32_to_cpu(vid_hdr->data_size); - } - - sv->leb_count += 1; - rb_link_node(&seb->u.rb, parent, p); - rb_insert_color(&seb->u.rb, &sv->root); - return 0; -} - -/** - * ubi_scan_find_sv - find information about a particular volume in the - * scanning information. - * @si: scanning information - * @vol_id: the requested volume ID - * - * This function returns a pointer to the volume description or %NULL if there - * are no data about this volume in the scanning information. - */ -struct ubi_scan_volume *ubi_scan_find_sv(const struct ubi_scan_info *si, - int vol_id) -{ - struct ubi_scan_volume *sv; - struct rb_node *p = si->volumes.rb_node; - - while (p) { - sv = rb_entry(p, struct ubi_scan_volume, rb); - - if (vol_id == sv->vol_id) - return sv; - - if (vol_id > sv->vol_id) - p = p->rb_left; - else - p = p->rb_right; - } - - return NULL; -} - -/** - * ubi_scan_find_seb - find information about a particular logical - * eraseblock in the volume scanning information. - * @sv: a pointer to the volume scanning information - * @lnum: the requested logical eraseblock - * - * This function returns a pointer to the scanning logical eraseblock or %NULL - * if there are no data about it in the scanning volume information. - */ -struct ubi_scan_leb *ubi_scan_find_seb(const struct ubi_scan_volume *sv, - int lnum) -{ - struct ubi_scan_leb *seb; - struct rb_node *p = sv->root.rb_node; - - while (p) { - seb = rb_entry(p, struct ubi_scan_leb, u.rb); - - if (lnum == seb->lnum) - return seb; - - if (lnum > seb->lnum) - p = p->rb_left; - else - p = p->rb_right; - } - - return NULL; -} - -/** - * ubi_scan_rm_volume - delete scanning information about a volume. - * @si: scanning information - * @sv: the volume scanning information to delete - */ -void ubi_scan_rm_volume(struct ubi_scan_info *si, struct ubi_scan_volume *sv) -{ - struct rb_node *rb; - struct ubi_scan_leb *seb; - - dbg_bld("remove scanning information about volume %d", sv->vol_id); - - while ((rb = rb_first(&sv->root))) { - seb = rb_entry(rb, struct ubi_scan_leb, u.rb); - rb_erase(&seb->u.rb, &sv->root); - list_add_tail(&seb->u.list, &si->erase); - } - - rb_erase(&sv->rb, &si->volumes); - kfree(sv); - si->vols_found -= 1; -} - -/** - * ubi_scan_erase_peb - erase a physical eraseblock. - * @ubi: UBI device description object - * @si: scanning information - * @pnum: physical eraseblock number to erase; - * @ec: erase counter value to write (%UBI_SCAN_UNKNOWN_EC if it is unknown) - * - * This function erases physical eraseblock 'pnum', and writes the erase - * counter header to it. This function should only be used on UBI device - * initialization stages, when the EBA unit had not been yet initialized. This - * function returns zero in case of success and a negative error code in case - * of failure. - */ -int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si, - int pnum, int ec) -{ - int err; - struct ubi_ec_hdr *ec_hdr; - - if ((long long)ec >= UBI_MAX_ERASECOUNTER) { - /* - * Erase counter overflow. Upgrade UBI and use 64-bit - * erase counters internally. - */ - ubi_err("erase counter overflow at PEB %d, EC %d", pnum, ec); - return -EINVAL; - } - - ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); - if (!ec_hdr) - return -ENOMEM; - - ec_hdr->ec = cpu_to_be64(ec); - - err = ubi_io_sync_erase(ubi, pnum, 0); - if (err < 0) - goto out_free; - - err = ubi_io_write_ec_hdr(ubi, pnum, ec_hdr); - -out_free: - kfree(ec_hdr); - return err; -} - -/** - * ubi_scan_get_free_peb - get a free physical eraseblock. - * @ubi: UBI device description object - * @si: scanning information - * - * This function returns a free physical eraseblock. It is supposed to be - * called on the UBI initialization stages when the wear-leveling unit is not - * initialized yet. This function picks a physical eraseblocks from one of the - * lists, writes the EC header if it is needed, and removes it from the list. - * - * This function returns scanning physical eraseblock information in case of - * success and an error code in case of failure. - */ -struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi, - struct ubi_scan_info *si) -{ - int err = 0, i; - struct ubi_scan_leb *seb; - - if (!list_empty(&si->free)) { - seb = list_entry(si->free.next, struct ubi_scan_leb, u.list); - list_del(&seb->u.list); - dbg_bld("return free PEB %d, EC %d", seb->pnum, seb->ec); - return seb; - } - - for (i = 0; i < 2; i++) { - struct list_head *head; - struct ubi_scan_leb *tmp_seb; - - if (i == 0) - head = &si->erase; - else - head = &si->corr; - - /* - * We try to erase the first physical eraseblock from the @head - * list and pick it if we succeed, or try to erase the - * next one if not. And so forth. We don't want to take care - * about bad eraseblocks here - they'll be handled later. - */ - list_for_each_entry_safe(seb, tmp_seb, head, u.list) { - if (seb->ec == UBI_SCAN_UNKNOWN_EC) - seb->ec = si->mean_ec; - - err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1); - if (err) - continue; - - seb->ec += 1; - list_del(&seb->u.list); - dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec); - return seb; - } - } - - ubi_err("no eraseblocks found"); - return ERR_PTR(-ENOSPC); -} - -/** - * process_eb - read UBI headers, check them and add corresponding data - * to the scanning information. - * @ubi: UBI device description object - * @si: scanning information - * @pnum: the physical eraseblock number - * - * This function returns a zero if the physical eraseblock was successfully - * handled and a negative error code in case of failure. - */ -static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum) -{ - long long uninitialized_var(ec); - int err, bitflips = 0, vol_id, ec_corr = 0; - - dbg_bld("scan PEB %d", pnum); - - /* Skip bad physical eraseblocks */ - err = ubi_io_is_bad(ubi, pnum); - if (err < 0) - return err; - else if (err) { - /* - * FIXME: this is actually duty of the I/O unit to initialize - * this, but MTD does not provide enough information. - */ - si->bad_peb_count += 1; - return 0; - } - - err = ubi_io_read_ec_hdr(ubi, pnum, ech, 0); - if (err < 0) - return err; - else if (err == UBI_IO_BITFLIPS) - bitflips = 1; - else if (err == UBI_IO_PEB_EMPTY) - return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, &si->erase); - else if (err == UBI_IO_BAD_EC_HDR) { - /* - * We have to also look at the VID header, possibly it is not - * corrupted. Set %bitflips flag in order to make this PEB be - * moved and EC be re-created. - */ - ec_corr = 1; - ec = UBI_SCAN_UNKNOWN_EC; - bitflips = 1; - } - - si->is_empty = 0; - - if (!ec_corr) { - /* Make sure UBI version is OK */ - if (ech->version != UBI_VERSION) { - ubi_err("this UBI version is %d, image version is %d", - UBI_VERSION, (int)ech->version); - return -EINVAL; - } - - ec = be64_to_cpu(ech->ec); - if (ec > UBI_MAX_ERASECOUNTER) { - /* - * Erase counter overflow. The EC headers have 64 bits - * reserved, but we anyway make use of only 31 bit - * values, as this seems to be enough for any existing - * flash. Upgrade UBI and use 64-bit erase counters - * internally. - */ - ubi_err("erase counter overflow, max is %d", - UBI_MAX_ERASECOUNTER); - ubi_dbg_dump_ec_hdr(ech); - return -EINVAL; - } - } - - /* OK, we've done with the EC header, let's look at the VID header */ - - err = ubi_io_read_vid_hdr(ubi, pnum, vidh, 0); - if (err < 0) - return err; - else if (err == UBI_IO_BITFLIPS) - bitflips = 1; - else if (err == UBI_IO_BAD_VID_HDR || - (err == UBI_IO_PEB_FREE && ec_corr)) { - /* VID header is corrupted */ - err = add_to_list(si, pnum, ec, &si->corr); - if (err) - return err; - goto adjust_mean_ec; - } else if (err == UBI_IO_PEB_FREE) { - /* No VID header - the physical eraseblock is free */ - err = add_to_list(si, pnum, ec, &si->free); - if (err) - return err; - goto adjust_mean_ec; - } - - vol_id = be32_to_cpu(vidh->vol_id); - if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOLUME_ID) { - int lnum = be32_to_cpu(vidh->lnum); - - /* Unsupported internal volume */ - switch (vidh->compat) { - case UBI_COMPAT_DELETE: - ubi_msg("\"delete\" compatible internal volume %d:%d" - " found, remove it", vol_id, lnum); - err = add_to_list(si, pnum, ec, &si->corr); - if (err) - return err; - break; - - case UBI_COMPAT_RO: - ubi_msg("read-only compatible internal volume %d:%d" - " found, switch to read-only mode", - vol_id, lnum); - ubi->ro_mode = 1; - break; - - case UBI_COMPAT_PRESERVE: - ubi_msg("\"preserve\" compatible internal volume %d:%d" - " found", vol_id, lnum); - err = add_to_list(si, pnum, ec, &si->alien); - if (err) - return err; - si->alien_peb_count += 1; - return 0; - - case UBI_COMPAT_REJECT: - ubi_err("incompatible internal volume %d:%d found", - vol_id, lnum); - return -EINVAL; - } - } - - /* Both UBI headers seem to be fine */ - err = ubi_scan_add_used(ubi, si, pnum, ec, vidh, bitflips); - if (err) - return err; - -adjust_mean_ec: - if (!ec_corr) { - si->ec_sum += ec; - si->ec_count += 1; - if (ec > si->max_ec) - si->max_ec = ec; - if (ec < si->min_ec) - si->min_ec = ec; - } - - return 0; -} - -/** - * ubi_scan - scan an MTD device. - * @ubi: UBI device description object - * - * This function does full scanning of an MTD device and returns complete - * information about it. In case of failure, an error code is returned. - */ -struct ubi_scan_info *ubi_scan(struct ubi_device *ubi) -{ - int err, pnum; - struct rb_node *rb1, *rb2; - struct ubi_scan_volume *sv; - struct ubi_scan_leb *seb; - struct ubi_scan_info *si; - - si = kzalloc(sizeof(struct ubi_scan_info), GFP_KERNEL); - if (!si) - return ERR_PTR(-ENOMEM); - - INIT_LIST_HEAD(&si->corr); - INIT_LIST_HEAD(&si->free); - INIT_LIST_HEAD(&si->erase); - INIT_LIST_HEAD(&si->alien); - si->volumes = RB_ROOT; - si->is_empty = 1; - - err = -ENOMEM; - ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); - if (!ech) - goto out_si; - - vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); - if (!vidh) - goto out_ech; - - for (pnum = 0; pnum < ubi->peb_count; pnum++) { - cond_resched(); - - dbg_msg("process PEB %d", pnum); - err = process_eb(ubi, si, pnum); - if (err < 0) - goto out_vidh; - } - - dbg_msg("scanning is finished"); - - /* Calculate mean erase counter */ - if (si->ec_count) { - do_div(si->ec_sum, si->ec_count); - si->mean_ec = si->ec_sum; - } - - if (si->is_empty) - ubi_msg("empty MTD device detected"); - - /* - * In case of unknown erase counter we use the mean erase counter - * value. - */ - ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) { - ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) - if (seb->ec == UBI_SCAN_UNKNOWN_EC) - seb->ec = si->mean_ec; - } - - list_for_each_entry(seb, &si->free, u.list) { - if (seb->ec == UBI_SCAN_UNKNOWN_EC) - seb->ec = si->mean_ec; - } - - list_for_each_entry(seb, &si->corr, u.list) - if (seb->ec == UBI_SCAN_UNKNOWN_EC) - seb->ec = si->mean_ec; - - list_for_each_entry(seb, &si->erase, u.list) - if (seb->ec == UBI_SCAN_UNKNOWN_EC) - seb->ec = si->mean_ec; - - err = paranoid_check_si(ubi, si); - if (err) { - if (err > 0) - err = -EINVAL; - goto out_vidh; - } - - ubi_free_vid_hdr(ubi, vidh); - kfree(ech); - - return si; - -out_vidh: - ubi_free_vid_hdr(ubi, vidh); -out_ech: - kfree(ech); -out_si: - ubi_scan_destroy_si(si); - return ERR_PTR(err); -} - -/** - * destroy_sv - free the scanning volume information - * @sv: scanning volume information - * - * This function destroys the volume RB-tree (@sv->root) and the scanning - * volume information. - */ -static void destroy_sv(struct ubi_scan_volume *sv) -{ - struct ubi_scan_leb *seb; - struct rb_node *this = sv->root.rb_node; - - while (this) { - if (this->rb_left) - this = this->rb_left; - else if (this->rb_right) - this = this->rb_right; - else { - seb = rb_entry(this, struct ubi_scan_leb, u.rb); - this = rb_parent(this); - if (this) { - if (this->rb_left == &seb->u.rb) - this->rb_left = NULL; - else - this->rb_right = NULL; - } - - kfree(seb); - } - } - kfree(sv); -} - -/** - * ubi_scan_destroy_si - destroy scanning information. - * @si: scanning information - */ -void ubi_scan_destroy_si(struct ubi_scan_info *si) -{ - struct ubi_scan_leb *seb, *seb_tmp; - struct ubi_scan_volume *sv; - struct rb_node *rb; - - list_for_each_entry_safe(seb, seb_tmp, &si->alien, u.list) { - list_del(&seb->u.list); - kfree(seb); - } - list_for_each_entry_safe(seb, seb_tmp, &si->erase, u.list) { - list_del(&seb->u.list); - kfree(seb); - } - list_for_each_entry_safe(seb, seb_tmp, &si->corr, u.list) { - list_del(&seb->u.list); - kfree(seb); - } - list_for_each_entry_safe(seb, seb_tmp, &si->free, u.list) { - list_del(&seb->u.list); - kfree(seb); - } - - /* Destroy the volume RB-tree */ - rb = si->volumes.rb_node; - while (rb) { - if (rb->rb_left) - rb = rb->rb_left; - else if (rb->rb_right) - rb = rb->rb_right; - else { - sv = rb_entry(rb, struct ubi_scan_volume, rb); - - rb = rb_parent(rb); - if (rb) { - if (rb->rb_left == &sv->rb) - rb->rb_left = NULL; - else - rb->rb_right = NULL; - } - - destroy_sv(sv); - } - } - - kfree(si); -} - -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID - -/** - * paranoid_check_si - check if the scanning information is correct and - * consistent. - * @ubi: UBI device description object - * @si: scanning information - * - * This function returns zero if the scanning information is all right, %1 if - * not and a negative error code if an error occurred. - */ -static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si) -{ - int pnum, err, vols_found = 0; - struct rb_node *rb1, *rb2; - struct ubi_scan_volume *sv; - struct ubi_scan_leb *seb, *last_seb; - uint8_t *buf; - - /* - * At first, check that scanning information is OK. - */ - ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) { - int leb_count = 0; - - cond_resched(); - - vols_found += 1; - - if (si->is_empty) { - ubi_err("bad is_empty flag"); - goto bad_sv; - } - - if (sv->vol_id < 0 || sv->highest_lnum < 0 || - sv->leb_count < 0 || sv->vol_type < 0 || sv->used_ebs < 0 || - sv->data_pad < 0 || sv->last_data_size < 0) { - ubi_err("negative values"); - goto bad_sv; - } - - if (sv->vol_id >= UBI_MAX_VOLUMES && - sv->vol_id < UBI_INTERNAL_VOL_START) { - ubi_err("bad vol_id"); - goto bad_sv; - } - - if (sv->vol_id > si->highest_vol_id) { - ubi_err("highest_vol_id is %d, but vol_id %d is there", - si->highest_vol_id, sv->vol_id); - goto out; - } - - if (sv->vol_type != UBI_DYNAMIC_VOLUME && - sv->vol_type != UBI_STATIC_VOLUME) { - ubi_err("bad vol_type"); - goto bad_sv; - } - - if (sv->data_pad > ubi->leb_size / 2) { - ubi_err("bad data_pad"); - goto bad_sv; - } - - last_seb = NULL; - ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) { - cond_resched(); - - last_seb = seb; - leb_count += 1; - - if (seb->pnum < 0 || seb->ec < 0) { - ubi_err("negative values"); - goto bad_seb; - } - - if (seb->ec < si->min_ec) { - ubi_err("bad si->min_ec (%d), %d found", - si->min_ec, seb->ec); - goto bad_seb; - } - - if (seb->ec > si->max_ec) { - ubi_err("bad si->max_ec (%d), %d found", - si->max_ec, seb->ec); - goto bad_seb; - } - - if (seb->pnum >= ubi->peb_count) { - ubi_err("too high PEB number %d, total PEBs %d", - seb->pnum, ubi->peb_count); - goto bad_seb; - } - - if (sv->vol_type == UBI_STATIC_VOLUME) { - if (seb->lnum >= sv->used_ebs) { - ubi_err("bad lnum or used_ebs"); - goto bad_seb; - } - } else { - if (sv->used_ebs != 0) { - ubi_err("non-zero used_ebs"); - goto bad_seb; - } - } - - if (seb->lnum > sv->highest_lnum) { - ubi_err("incorrect highest_lnum or lnum"); - goto bad_seb; - } - } - - if (sv->leb_count != leb_count) { - ubi_err("bad leb_count, %d objects in the tree", - leb_count); - goto bad_sv; - } - - if (!last_seb) - continue; - - seb = last_seb; - - if (seb->lnum != sv->highest_lnum) { - ubi_err("bad highest_lnum"); - goto bad_seb; - } - } - - if (vols_found != si->vols_found) { - ubi_err("bad si->vols_found %d, should be %d", - si->vols_found, vols_found); - goto out; - } - - /* Check that scanning information is correct */ - ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) { - last_seb = NULL; - ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) { - int vol_type; - - cond_resched(); - - last_seb = seb; - - err = ubi_io_read_vid_hdr(ubi, seb->pnum, vidh, 1); - if (err && err != UBI_IO_BITFLIPS) { - ubi_err("VID header is not OK (%d)", err); - if (err > 0) - err = -EIO; - return err; - } - - vol_type = vidh->vol_type == UBI_VID_DYNAMIC ? - UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME; - if (sv->vol_type != vol_type) { - ubi_err("bad vol_type"); - goto bad_vid_hdr; - } - - if (seb->sqnum != be64_to_cpu(vidh->sqnum)) { - ubi_err("bad sqnum %llu", seb->sqnum); - goto bad_vid_hdr; - } - - if (sv->vol_id != be32_to_cpu(vidh->vol_id)) { - ubi_err("bad vol_id %d", sv->vol_id); - goto bad_vid_hdr; - } - - if (sv->compat != vidh->compat) { - ubi_err("bad compat %d", vidh->compat); - goto bad_vid_hdr; - } - - if (seb->lnum != be32_to_cpu(vidh->lnum)) { - ubi_err("bad lnum %d", seb->lnum); - goto bad_vid_hdr; - } - - if (sv->used_ebs != be32_to_cpu(vidh->used_ebs)) { - ubi_err("bad used_ebs %d", sv->used_ebs); - goto bad_vid_hdr; - } - - if (sv->data_pad != be32_to_cpu(vidh->data_pad)) { - ubi_err("bad data_pad %d", sv->data_pad); - goto bad_vid_hdr; - } - - if (seb->leb_ver != be32_to_cpu(vidh->leb_ver)) { - ubi_err("bad leb_ver %u", seb->leb_ver); - goto bad_vid_hdr; - } - } - - if (!last_seb) - continue; - - if (sv->highest_lnum != be32_to_cpu(vidh->lnum)) { - ubi_err("bad highest_lnum %d", sv->highest_lnum); - goto bad_vid_hdr; - } - - if (sv->last_data_size != be32_to_cpu(vidh->data_size)) { - ubi_err("bad last_data_size %d", sv->last_data_size); - goto bad_vid_hdr; - } - } - - /* - * Make sure that all the physical eraseblocks are in one of the lists - * or trees. - */ - buf = kzalloc(ubi->peb_count, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - for (pnum = 0; pnum < ubi->peb_count; pnum++) { - err = ubi_io_is_bad(ubi, pnum); - if (err < 0) { - kfree(buf); - return err; - } - else if (err) - buf[pnum] = 1; - } - - ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) - ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) - buf[seb->pnum] = 1; - - list_for_each_entry(seb, &si->free, u.list) - buf[seb->pnum] = 1; - - list_for_each_entry(seb, &si->corr, u.list) - buf[seb->pnum] = 1; - - list_for_each_entry(seb, &si->erase, u.list) - buf[seb->pnum] = 1; - - list_for_each_entry(seb, &si->alien, u.list) - buf[seb->pnum] = 1; - - err = 0; - for (pnum = 0; pnum < ubi->peb_count; pnum++) - if (!buf[pnum]) { - ubi_err("PEB %d is not referred", pnum); - err = 1; - } - - kfree(buf); - if (err) - goto out; - return 0; - -bad_seb: - ubi_err("bad scanning information about LEB %d", seb->lnum); - ubi_dbg_dump_seb(seb, 0); - ubi_dbg_dump_sv(sv); - goto out; - -bad_sv: - ubi_err("bad scanning information about volume %d", sv->vol_id); - ubi_dbg_dump_sv(sv); - goto out; - -bad_vid_hdr: - ubi_err("bad scanning information about volume %d", sv->vol_id); - ubi_dbg_dump_sv(sv); - ubi_dbg_dump_vid_hdr(vidh); - -out: - ubi_dbg_dump_stack(); - return 1; -} - -#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h deleted file mode 100644 index 252b1f1..0000000 --- a/drivers/mtd/ubi/scan.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) International Business Machines Corp., 2006 - * - * SPDX-License-Identifier: GPL-2.0+ - * - * Author: Artem Bityutskiy (Битюцкий Артём) - */ - -#ifndef __UBI_SCAN_H__ -#define __UBI_SCAN_H__ - -/* The erase counter value for this physical eraseblock is unknown */ -#define UBI_SCAN_UNKNOWN_EC (-1) - -/** - * struct ubi_scan_leb - scanning information about a physical eraseblock. - * @ec: erase counter (%UBI_SCAN_UNKNOWN_EC if it is unknown) - * @pnum: physical eraseblock number - * @lnum: logical eraseblock number - * @scrub: if this physical eraseblock needs scrubbing - * @sqnum: sequence number - * @u: unions RB-tree or @list links - * @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects - * @u.list: link in one of the eraseblock lists - * @leb_ver: logical eraseblock version (obsolete) - * - * One object of this type is allocated for each physical eraseblock during - * scanning. - */ -struct ubi_scan_leb { - int ec; - int pnum; - int lnum; - int scrub; - unsigned long long sqnum; - union { - struct rb_node rb; - struct list_head list; - } u; - uint32_t leb_ver; -}; - -/** - * struct ubi_scan_volume - scanning information about a volume. - * @vol_id: volume ID - * @highest_lnum: highest logical eraseblock number in this volume - * @leb_count: number of logical eraseblocks in this volume - * @vol_type: volume type - * @used_ebs: number of used logical eraseblocks in this volume (only for - * static volumes) - * @last_data_size: amount of data in the last logical eraseblock of this - * volume (always equivalent to the usable logical eraseblock size in case of - * dynamic volumes) - * @data_pad: how many bytes at the end of logical eraseblocks of this volume - * are not used (due to volume alignment) - * @compat: compatibility flags of this volume - * @rb: link in the volume RB-tree - * @root: root of the RB-tree containing all the eraseblock belonging to this - * volume (&struct ubi_scan_leb objects) - * - * One object of this type is allocated for each volume during scanning. - */ -struct ubi_scan_volume { - int vol_id; - int highest_lnum; - int leb_count; - int vol_type; - int used_ebs; - int last_data_size; - int data_pad; - int compat; - struct rb_node rb; - struct rb_root root; -}; - -/** - * struct ubi_scan_info - UBI scanning information. - * @volumes: root of the volume RB-tree - * @corr: list of corrupted physical eraseblocks - * @free: list of free physical eraseblocks - * @erase: list of physical eraseblocks which have to be erased - * @alien: list of physical eraseblocks which should not be used by UBI (e.g., - * @bad_peb_count: count of bad physical eraseblocks - * those belonging to "preserve"-compatible internal volumes) - * @vols_found: number of volumes found during scanning - * @highest_vol_id: highest volume ID - * @alien_peb_count: count of physical eraseblocks in the @alien list - * @is_empty: flag indicating whether the MTD device is empty or not - * @min_ec: lowest erase counter value - * @max_ec: highest erase counter value - * @max_sqnum: highest sequence number value - * @mean_ec: mean erase counter value - * @ec_sum: a temporary variable used when calculating @mean_ec - * @ec_count: a temporary variable used when calculating @mean_ec - * - * This data structure contains the result of scanning and may be used by other - * UBI units to build final UBI data structures, further error-recovery and so - * on. - */ -struct ubi_scan_info { - struct rb_root volumes; - struct list_head corr; - struct list_head free; - struct list_head erase; - struct list_head alien; - int bad_peb_count; - int vols_found; - int highest_vol_id; - int alien_peb_count; - int is_empty; - int min_ec; - int max_ec; - unsigned long long max_sqnum; - int mean_ec; - uint64_t ec_sum; - int ec_count; -}; - -struct ubi_device; -struct ubi_vid_hdr; - -/* - * ubi_scan_move_to_list - move a physical eraseblock from the volume tree to a - * list. - * - * @sv: volume scanning information - * @seb: scanning eraseblock infprmation - * @list: the list to move to - */ -static inline void ubi_scan_move_to_list(struct ubi_scan_volume *sv, - struct ubi_scan_leb *seb, - struct list_head *list) -{ - rb_erase(&seb->u.rb, &sv->root); - list_add_tail(&seb->u.list, list); -} - -int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, - int pnum, int ec, const struct ubi_vid_hdr *vid_hdr, - int bitflips); -struct ubi_scan_volume *ubi_scan_find_sv(const struct ubi_scan_info *si, - int vol_id); -struct ubi_scan_leb *ubi_scan_find_seb(const struct ubi_scan_volume *sv, - int lnum); -void ubi_scan_rm_volume(struct ubi_scan_info *si, struct ubi_scan_volume *sv); -struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi, - struct ubi_scan_info *si); -int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si, - int pnum, int ec); -struct ubi_scan_info *ubi_scan(struct ubi_device *ubi); -void ubi_scan_destroy_si(struct ubi_scan_info *si); - -#endif /* !__UBI_SCAN_H__ */ diff --git a/drivers/mtd/ubi/ubi-media.h b/drivers/mtd/ubi/ubi-media.h index 9012326..2809805 100644 --- a/drivers/mtd/ubi/ubi-media.h +++ b/drivers/mtd/ubi/ubi-media.h @@ -86,10 +86,11 @@ enum { * Compatibility constants used by internal volumes. * * @UBI_COMPAT_DELETE: delete this internal volume before anything is written - * to the flash + * to the flash * @UBI_COMPAT_RO: attach this device in read-only mode * @UBI_COMPAT_PRESERVE: preserve this internal volume - do not touch its - * physical eraseblocks, don't allow the wear-leveling unit to move them + * physical eraseblocks, don't allow the wear-leveling + * sub-system to move them * @UBI_COMPAT_REJECT: reject this UBI image */ enum { @@ -111,18 +112,19 @@ enum { * struct ubi_ec_hdr - UBI erase counter header. * @magic: erase counter header magic number (%UBI_EC_HDR_MAGIC) * @version: version of UBI implementation which is supposed to accept this - * UBI image + * UBI image * @padding1: reserved for future, zeroes * @ec: the erase counter * @vid_hdr_offset: where the VID header starts * @data_offset: where the user data start + * @image_seq: image sequence number * @padding2: reserved for future, zeroes * @hdr_crc: erase counter header CRC checksum * * The erase counter header takes 64 bytes and has a plenty of unused space for * future usage. The unused fields are zeroed. The @version field is used to * indicate the version of UBI implementation which is supposed to be able to - * work with this UBI image. If @version is greater then the current UBI + * work with this UBI image. If @version is greater than the current UBI * version, the image is rejected. This may be useful in future if something * is changed radically. This field is duplicated in the volume identifier * header. @@ -131,6 +133,14 @@ enum { * volume identifier header and user data, relative to the beginning of the * physical eraseblock. These values have to be the same for all physical * eraseblocks. + * + * The @image_seq field is used to validate a UBI image that has been prepared + * for a UBI device. The @image_seq value can be any value, but it must be the + * same on all eraseblocks. UBI will ensure that all new erase counter headers + * also contain this value, and will check the value when attaching the flash. + * One way to make use of @image_seq is to increase its value by one every time + * an image is flashed over an existing image, then, if the flashing does not + * complete, UBI will detect the error when attaching the media. */ struct ubi_ec_hdr { __be32 magic; @@ -139,32 +149,32 @@ struct ubi_ec_hdr { __be64 ec; /* Warning: the current limit is 31-bit anyway! */ __be32 vid_hdr_offset; __be32 data_offset; - __u8 padding2[36]; + __be32 image_seq; + __u8 padding2[32]; __be32 hdr_crc; -} __attribute__ ((packed)); +} __packed; /** * struct ubi_vid_hdr - on-flash UBI volume identifier header. * @magic: volume identifier header magic number (%UBI_VID_HDR_MAGIC) * @version: UBI implementation version which is supposed to accept this UBI - * image (%UBI_VERSION) + * image (%UBI_VERSION) * @vol_type: volume type (%UBI_VID_DYNAMIC or %UBI_VID_STATIC) * @copy_flag: if this logical eraseblock was copied from another physical - * eraseblock (for wear-leveling reasons) + * eraseblock (for wear-leveling reasons) * @compat: compatibility of this volume (%0, %UBI_COMPAT_DELETE, - * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT) + * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT) * @vol_id: ID of this volume * @lnum: logical eraseblock number - * @leb_ver: version of this logical eraseblock (IMPORTANT: obsolete, to be - * removed, kept only for not breaking older UBI users) + * @padding1: reserved for future, zeroes * @data_size: how many bytes of data this logical eraseblock contains * @used_ebs: total number of used logical eraseblocks in this volume * @data_pad: how many bytes at the end of this physical eraseblock are not - * used + * used * @data_crc: CRC checksum of the data stored in this logical eraseblock - * @padding1: reserved for future, zeroes - * @sqnum: sequence number * @padding2: reserved for future, zeroes + * @sqnum: sequence number + * @padding3: reserved for future, zeroes * @hdr_crc: volume identifier header CRC checksum * * The @sqnum is the value of the global sequence counter at the time when this @@ -175,7 +185,7 @@ struct ubi_ec_hdr { * (sequence number) is used to distinguish between older and newer versions of * logical eraseblocks. * - * There are 2 situations when there may be more then one physical eraseblock + * There are 2 situations when there may be more than one physical eraseblock * corresponding to the same logical eraseblock, i.e., having the same @vol_id * and @lnum values in the volume identifier header. Suppose we have a logical * eraseblock L and it is mapped to the physical eraseblock P. @@ -212,10 +222,6 @@ struct ubi_ec_hdr { * checksum is correct, this physical eraseblock is selected (P1). Otherwise * the older one (P) is selected. * - * Note, there is an obsolete @leb_ver field which was used instead of @sqnum - * in the past. But it is not used anymore and we keep it in order to be able - * to deal with old UBI images. It will be removed at some point. - * * There are 2 sorts of volumes in UBI: user volumes and internal volumes. * Internal volumes are not seen from outside and are used for various internal * UBI purposes. In this implementation there is only one internal volume - the @@ -236,9 +242,9 @@ struct ubi_ec_hdr { * The @data_crc field contains the CRC checksum of the contents of the logical * eraseblock if this is a static volume. In case of dynamic volumes, it does * not contain the CRC checksum as a rule. The only exception is when the - * data of the physical eraseblock was moved by the wear-leveling unit, then - * the wear-leveling unit calculates the data CRC and stores it in the - * @data_crc field. And of course, the @copy_flag is %in this case. + * data of the physical eraseblock was moved by the wear-leveling sub-system, + * then the wear-leveling sub-system calculates the data CRC and stores it in + * the @data_crc field. And of course, the @copy_flag is %in this case. * * The @data_size field is used only for static volumes because UBI has to know * how many bytes of data are stored in this eraseblock. For dynamic volumes, @@ -265,23 +271,23 @@ struct ubi_vid_hdr { __u8 compat; __be32 vol_id; __be32 lnum; - __be32 leb_ver; /* obsolete, to be removed, don't use */ + __u8 padding1[4]; __be32 data_size; __be32 used_ebs; __be32 data_pad; __be32 data_crc; - __u8 padding1[4]; + __u8 padding2[4]; __be64 sqnum; - __u8 padding2[12]; + __u8 padding3[12]; __be32 hdr_crc; -} __attribute__ ((packed)); +} __packed; /* Internal UBI volumes count */ #define UBI_INT_VOL_COUNT 1 /* - * Starting ID of internal volumes. There is reserved room for 4096 internal - * volumes. + * Starting ID of internal volumes: 0x7fffefff. + * There is reserved room for 4096 internal volumes. */ #define UBI_INTERNAL_VOL_START (0x7FFFFFFF - 4096) @@ -351,10 +357,151 @@ struct ubi_vtbl_record { __u8 vol_type; __u8 upd_marker; __be16 name_len; +#ifndef __UBOOT__ __u8 name[UBI_VOL_NAME_MAX+1]; +#else + char name[UBI_VOL_NAME_MAX+1]; +#endif __u8 flags; __u8 padding[23]; __be32 crc; -} __attribute__ ((packed)); +} __packed; + +/* UBI fastmap on-flash data structures */ + +#define UBI_FM_SB_VOLUME_ID (UBI_LAYOUT_VOLUME_ID + 1) +#define UBI_FM_DATA_VOLUME_ID (UBI_LAYOUT_VOLUME_ID + 2) +/* fastmap on-flash data structure format version */ +#define UBI_FM_FMT_VERSION 1 + +#define UBI_FM_SB_MAGIC 0x7B11D69F +#define UBI_FM_HDR_MAGIC 0xD4B82EF7 +#define UBI_FM_VHDR_MAGIC 0xFA370ED1 +#define UBI_FM_POOL_MAGIC 0x67AF4D08 +#define UBI_FM_EBA_MAGIC 0xf0c040a8 + +/* A fastmap supber block can be located between PEB 0 and + * UBI_FM_MAX_START */ +#define UBI_FM_MAX_START 64 + +/* A fastmap can use up to UBI_FM_MAX_BLOCKS PEBs */ +#define UBI_FM_MAX_BLOCKS 32 + +/* 5% of the total number of PEBs have to be scanned while attaching + * from a fastmap. + * But the size of this pool is limited to be between UBI_FM_MIN_POOL_SIZE and + * UBI_FM_MAX_POOL_SIZE */ +#define UBI_FM_MIN_POOL_SIZE 8 +#define UBI_FM_MAX_POOL_SIZE 256 + +#define UBI_FM_WL_POOL_SIZE 25 + +/** + * struct ubi_fm_sb - UBI fastmap super block + * @magic: fastmap super block magic number (%UBI_FM_SB_MAGIC) + * @version: format version of this fastmap + * @data_crc: CRC over the fastmap data + * @used_blocks: number of PEBs used by this fastmap + * @block_loc: an array containing the location of all PEBs of the fastmap + * @block_ec: the erase counter of each used PEB + * @sqnum: highest sequence number value at the time while taking the fastmap + * + */ +struct ubi_fm_sb { + __be32 magic; + __u8 version; + __u8 padding1[3]; + __be32 data_crc; + __be32 used_blocks; + __be32 block_loc[UBI_FM_MAX_BLOCKS]; + __be32 block_ec[UBI_FM_MAX_BLOCKS]; + __be64 sqnum; + __u8 padding2[32]; +} __packed; + +/** + * struct ubi_fm_hdr - header of the fastmap data set + * @magic: fastmap header magic number (%UBI_FM_HDR_MAGIC) + * @free_peb_count: number of free PEBs known by this fastmap + * @used_peb_count: number of used PEBs known by this fastmap + * @scrub_peb_count: number of to be scrubbed PEBs known by this fastmap + * @bad_peb_count: number of bad PEBs known by this fastmap + * @erase_peb_count: number of bad PEBs which have to be erased + * @vol_count: number of UBI volumes known by this fastmap + */ +struct ubi_fm_hdr { + __be32 magic; + __be32 free_peb_count; + __be32 used_peb_count; + __be32 scrub_peb_count; + __be32 bad_peb_count; + __be32 erase_peb_count; + __be32 vol_count; + __u8 padding[4]; +} __packed; + +/* struct ubi_fm_hdr is followed by two struct ubi_fm_scan_pool */ + +/** + * struct ubi_fm_scan_pool - Fastmap pool PEBs to be scanned while attaching + * @magic: pool magic numer (%UBI_FM_POOL_MAGIC) + * @size: current pool size + * @max_size: maximal pool size + * @pebs: an array containing the location of all PEBs in this pool + */ +struct ubi_fm_scan_pool { + __be32 magic; + __be16 size; + __be16 max_size; + __be32 pebs[UBI_FM_MAX_POOL_SIZE]; + __be32 padding[4]; +} __packed; + +/* ubi_fm_scan_pool is followed by nfree+nused struct ubi_fm_ec records */ + +/** + * struct ubi_fm_ec - stores the erase counter of a PEB + * @pnum: PEB number + * @ec: ec of this PEB + */ +struct ubi_fm_ec { + __be32 pnum; + __be32 ec; +} __packed; + +/** + * struct ubi_fm_volhdr - Fastmap volume header + * it identifies the start of an eba table + * @magic: Fastmap volume header magic number (%UBI_FM_VHDR_MAGIC) + * @vol_id: volume id of the fastmapped volume + * @vol_type: type of the fastmapped volume + * @data_pad: data_pad value of the fastmapped volume + * @used_ebs: number of used LEBs within this volume + * @last_eb_bytes: number of bytes used in the last LEB + */ +struct ubi_fm_volhdr { + __be32 magic; + __be32 vol_id; + __u8 vol_type; + __u8 padding1[3]; + __be32 data_pad; + __be32 used_ebs; + __be32 last_eb_bytes; + __u8 padding2[8]; +} __packed; + +/* struct ubi_fm_volhdr is followed by one struct ubi_fm_eba records */ + +/** + * struct ubi_fm_eba - denotes an association beween a PEB and LEB + * @magic: EBA table magic number + * @reserved_pebs: number of table entries + * @pnum: PEB number of LEB (LEB is the index) + */ +struct ubi_fm_eba { + __be32 magic; + __be32 reserved_pebs; + __be32 pnum[0]; +} __packed; #endif /* !__UBI_MEDIA_H__ */ diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index f4f7165..1c39573 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -10,7 +10,8 @@ #ifndef __UBI_UBI_H__ #define __UBI_UBI_H__ -#ifdef UBI_LINUX +#define __UBOOT__ +#ifndef __UBOOT__ #include <linux/init.h> #include <linux/types.h> #include <linux/list.h> @@ -23,22 +24,18 @@ #include <linux/fs.h> #include <linux/cdev.h> #include <linux/device.h> +#include <linux/slab.h> #include <linux/string.h> #include <linux/vmalloc.h> -#include <linux/mtd/mtd.h> -#include <linux/mtd/ubi.h> +#include <linux/notifier.h> +#include <asm/pgtable.h> +#else +#include <ubi_uboot.h> #endif - -#include <linux/types.h> -#include <linux/list.h> -#include <linux/rbtree.h> -#include <linux/string.h> #include <linux/mtd/mtd.h> #include <linux/mtd/ubi.h> - #include "ubi-media.h" -#include "scan.h" -#include "debug.h" +#include <mtd/ubi-user.h> /* Maximum number of supported UBI devices */ #define UBI_MAX_DEVICES 32 @@ -52,20 +49,21 @@ #else #define ubi_msg(fmt, ...) printk(KERN_NOTICE "UBI: " fmt "\n", ##__VA_ARGS__) #endif + /* UBI warning messages */ -#define ubi_warn(fmt, ...) printk(KERN_WARNING "UBI warning: %s: " fmt "\n", \ - __func__, ##__VA_ARGS__) +#define ubi_warn(fmt, ...) pr_warn("UBI warning: %s: " fmt "\n", \ + __func__, ##__VA_ARGS__) /* UBI error messages */ -#define ubi_err(fmt, ...) printk(KERN_ERR "UBI error: %s: " fmt "\n", \ +#define ubi_err(fmt, ...) pr_err("UBI error: %s: " fmt "\n", \ __func__, ##__VA_ARGS__) -/* Lowest number PEBs reserved for bad PEB handling */ -#define MIN_RESEVED_PEBS 2 - /* Background thread name pattern */ #define UBI_BGT_NAME_PATTERN "ubi_bgt%dd" -/* This marker in the EBA table means that the LEB is um-mapped */ +/* + * This marker in the EBA table means that the LEB is um-mapped. + * NOTE! It has to have the same value as %UBI_ALL. + */ #define UBI_LEB_UNMAPPED -1 /* @@ -75,37 +73,98 @@ #define UBI_IO_RETRIES 3 /* - * Error codes returned by the I/O unit. - * - * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only - * 0xFF bytes - * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a - * valid erase counter header, and the rest are %0xFF bytes - * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC) - * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or - * CRC) + * Length of the protection queue. The length is effectively equivalent to the + * number of (global) erase cycles PEBs are protected from the wear-leveling + * worker. + */ +#define UBI_PROT_QUEUE_LEN 10 + +/* The volume ID/LEB number/erase counter is unknown */ +#define UBI_UNKNOWN -1 + +/* + * The UBI debugfs directory name pattern and maximum name length (3 for "ubi" + * + 2 for the number plus 1 for the trailing zero byte. + */ +#define UBI_DFS_DIR_NAME "ubi%d" +#define UBI_DFS_DIR_LEN (3 + 2 + 1) + +/* + * Error codes returned by the I/O sub-system. + * + * UBI_IO_FF: the read region of flash contains only 0xFFs + * UBI_IO_FF_BITFLIPS: the same as %UBI_IO_FF, but also also there was a data + * integrity error reported by the MTD driver + * (uncorrectable ECC error in case of NAND) + * UBI_IO_BAD_HDR: the EC or VID header is corrupted (bad magic or CRC) + * UBI_IO_BAD_HDR_EBADMSG: the same as %UBI_IO_BAD_HDR, but also there was a + * data integrity error reported by the MTD driver + * (uncorrectable ECC error in case of NAND) * UBI_IO_BITFLIPS: bit-flips were detected and corrected + * + * Note, it is probably better to have bit-flip and ebadmsg as flags which can + * be or'ed with other error code. But this is a big change because there are + * may callers, so it does not worth the risk of introducing a bug + */ +enum { + UBI_IO_FF = 1, + UBI_IO_FF_BITFLIPS, + UBI_IO_BAD_HDR, + UBI_IO_BAD_HDR_EBADMSG, + UBI_IO_BITFLIPS, +}; + +/* + * Return codes of the 'ubi_eba_copy_leb()' function. + * + * MOVE_CANCEL_RACE: canceled because the volume is being deleted, the source + * PEB was put meanwhile, or there is I/O on the source PEB + * MOVE_SOURCE_RD_ERR: canceled because there was a read error from the source + * PEB + * MOVE_TARGET_RD_ERR: canceled because there was a read error from the target + * PEB + * MOVE_TARGET_WR_ERR: canceled because there was a write error to the target + * PEB + * MOVE_TARGET_BITFLIPS: canceled because a bit-flip was detected in the + * target PEB + * MOVE_RETRY: retry scrubbing the PEB */ enum { - UBI_IO_PEB_EMPTY = 1, - UBI_IO_PEB_FREE, - UBI_IO_BAD_EC_HDR, - UBI_IO_BAD_VID_HDR, - UBI_IO_BITFLIPS + MOVE_CANCEL_RACE = 1, + MOVE_SOURCE_RD_ERR, + MOVE_TARGET_RD_ERR, + MOVE_TARGET_WR_ERR, + MOVE_TARGET_BITFLIPS, + MOVE_RETRY, +}; + +/* + * Return codes of the fastmap sub-system + * + * UBI_NO_FASTMAP: No fastmap super block was found + * UBI_BAD_FASTMAP: A fastmap was found but it's unusable + */ +enum { + UBI_NO_FASTMAP = 1, + UBI_BAD_FASTMAP, }; /** * struct ubi_wl_entry - wear-leveling entry. - * @rb: link in the corresponding RB-tree + * @u.rb: link in the corresponding (free/used) RB-tree + * @u.list: link in the protection queue * @ec: erase counter * @pnum: physical eraseblock number * - * This data structure is used in the WL unit. Each physical eraseblock has a - * corresponding &struct wl_entry object which may be kept in different - * RB-trees. See WL unit for details. + * This data structure is used in the WL sub-system. Each physical eraseblock + * has a corresponding &struct wl_entry object which may be kept in different + * RB-trees. See WL sub-system for details. */ struct ubi_wl_entry { - struct rb_node rb; + union { + struct rb_node rb; + struct list_head list; + } u; int ec; int pnum; }; @@ -119,10 +178,10 @@ struct ubi_wl_entry { * @mutex: read/write mutex to implement read/write access serialization to * the (@vol_id, @lnum) logical eraseblock * - * This data structure is used in the EBA unit to implement per-LEB locking. - * When a logical eraseblock is being locked - corresponding + * This data structure is used in the EBA sub-system to implement per-LEB + * locking. When a logical eraseblock is being locked - corresponding * &struct ubi_ltree_entry object is inserted to the lock tree (@ubi->ltree). - * See EBA unit for details. + * See EBA sub-system for details. */ struct ubi_ltree_entry { struct rb_node rb; @@ -132,9 +191,65 @@ struct ubi_ltree_entry { struct rw_semaphore mutex; }; +/** + * struct ubi_rename_entry - volume re-name description data structure. + * @new_name_len: new volume name length + * @new_name: new volume name + * @remove: if not zero, this volume should be removed, not re-named + * @desc: descriptor of the volume + * @list: links re-name entries into a list + * + * This data structure is utilized in the multiple volume re-name code. Namely, + * UBI first creates a list of &struct ubi_rename_entry objects from the + * &struct ubi_rnvol_req request object, and then utilizes this list to do all + * the job. + */ +struct ubi_rename_entry { + int new_name_len; + char new_name[UBI_VOL_NAME_MAX + 1]; + int remove; + struct ubi_volume_desc *desc; + struct list_head list; +}; + struct ubi_volume_desc; /** + * struct ubi_fastmap_layout - in-memory fastmap data structure. + * @e: PEBs used by the current fastmap + * @to_be_tortured: if non-zero tortured this PEB + * @used_blocks: number of used PEBs + * @max_pool_size: maximal size of the user pool + * @max_wl_pool_size: maximal size of the pool used by the WL sub-system + */ +struct ubi_fastmap_layout { + struct ubi_wl_entry *e[UBI_FM_MAX_BLOCKS]; + int to_be_tortured[UBI_FM_MAX_BLOCKS]; + int used_blocks; + int max_pool_size; + int max_wl_pool_size; +}; + +/** + * struct ubi_fm_pool - in-memory fastmap pool + * @pebs: PEBs in this pool + * @used: number of used PEBs + * @size: total number of PEBs in this pool + * @max_size: maximal size of the pool + * + * A pool gets filled with up to max_size. + * If all PEBs within the pool are used a new fastmap will be written + * to the flash and the pool gets refilled with empty PEBs. + * + */ +struct ubi_fm_pool { + int pebs[UBI_FM_MAX_POOL_SIZE]; + int used; + int size; + int max_size; +}; + +/** * struct ubi_volume - UBI volume description data structure. * @dev: device object to make use of the the Linux device model * @cdev: character device object to create character device @@ -160,8 +275,6 @@ struct ubi_volume_desc; * @upd_ebs: how many eraseblocks are expected to be updated * @ch_lnum: LEB number which is being changing by the atomic LEB change * operation - * @ch_dtype: data persistency type which is being changing by the atomic LEB - * change operation * @upd_bytes: how many bytes are expected to be received for volume update or * atomic LEB change * @upd_received: how many bytes were already received for volume update or @@ -175,10 +288,7 @@ struct ubi_volume_desc; * @upd_marker: %1 if the update marker is set for this volume * @updating: %1 if the volume is being updated * @changing_leb: %1 if the atomic LEB change ioctl command is in progress - * - * @gluebi_desc: gluebi UBI volume descriptor - * @gluebi_refcount: reference count of the gluebi MTD device - * @gluebi_mtd: MTD device description object of the gluebi MTD device + * @direct_writes: %1 if direct writes are enabled for this volume * * The @corrupted field indicates that the volume's contents is corrupted. * Since UBI protects only static volumes, this field is not relevant to @@ -202,16 +312,19 @@ struct ubi_volume { int vol_type; int usable_leb_size; int used_ebs; +#ifndef __UBOOT__ int last_eb_bytes; +#else + u32 last_eb_bytes; +#endif long long used_bytes; int alignment; int data_pad; int name_len; - char name[UBI_VOL_NAME_MAX+1]; + char name[UBI_VOL_NAME_MAX + 1]; int upd_ebs; int ch_lnum; - int ch_dtype; long long upd_bytes; long long upd_received; void *upd_buf; @@ -222,22 +335,11 @@ struct ubi_volume { unsigned int upd_marker:1; unsigned int updating:1; unsigned int changing_leb:1; - -#ifdef CONFIG_MTD_UBI_GLUEBI - /* - * Gluebi-related stuff may be compiled out. - * TODO: this should not be built into UBI but should be a separate - * ubimtd driver which works on top of UBI and emulates MTD devices. - */ - struct ubi_volume_desc *gluebi_desc; - int gluebi_refcount; - struct mtd_info gluebi_mtd; -#endif + unsigned int direct_writes:1; }; /** - * struct ubi_volume_desc - descriptor of the UBI volume returned when it is - * opened. + * struct ubi_volume_desc - UBI volume descriptor returned when it is opened. * @vol: reference to the corresponding volume description object * @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE) */ @@ -249,6 +351,37 @@ struct ubi_volume_desc { struct ubi_wl_entry; /** + * struct ubi_debug_info - debugging information for an UBI device. + * + * @chk_gen: if UBI general extra checks are enabled + * @chk_io: if UBI I/O extra checks are enabled + * @disable_bgt: disable the background task for testing purposes + * @emulate_bitflips: emulate bit-flips for testing purposes + * @emulate_io_failures: emulate write/erase failures for testing purposes + * @dfs_dir_name: name of debugfs directory containing files of this UBI device + * @dfs_dir: direntry object of the UBI device debugfs directory + * @dfs_chk_gen: debugfs knob to enable UBI general extra checks + * @dfs_chk_io: debugfs knob to enable UBI I/O extra checks + * @dfs_disable_bgt: debugfs knob to disable the background task + * @dfs_emulate_bitflips: debugfs knob to emulate bit-flips + * @dfs_emulate_io_failures: debugfs knob to emulate write/erase failures + */ +struct ubi_debug_info { + unsigned int chk_gen:1; + unsigned int chk_io:1; + unsigned int disable_bgt:1; + unsigned int emulate_bitflips:1; + unsigned int emulate_io_failures:1; + char dfs_dir_name[UBI_DFS_DIR_LEN + 1]; + struct dentry *dfs_dir; + struct dentry *dfs_chk_gen; + struct dentry *dfs_chk_io; + struct dentry *dfs_disable_bgt; + struct dentry *dfs_emulate_bitflips; + struct dentry *dfs_emulate_io_failures; +}; + +/** * struct ubi_device - UBI device description structure * @dev: UBI device object to use the the Linux device model * @cdev: character device object to create character device @@ -261,6 +394,7 @@ struct ubi_wl_entry; * @vol->readers, @vol->writers, @vol->exclusive, * @vol->ref_count, @vol->mapping and @vol->eba_tbl. * @ref_count: count of references on the UBI device + * @image_seq: image sequence number recorded on EC headers * * @rsvd_pebs: count of reserved physical eraseblocks * @avail_pebs: count of available physical eraseblocks @@ -269,12 +403,13 @@ struct ubi_wl_entry; * @beb_rsvd_level: normal level of PEBs reserved for bad PEB handling * * @autoresize_vol_id: ID of the volume which has to be auto-resized at the end - * of UBI ititializetion + * of UBI initialization * @vtbl_slots: how many slots are available in the volume table * @vtbl_size: size of the volume table in bytes * @vtbl: in-RAM volume table copy - * @volumes_mutex: protects on-flash volume table and serializes volume - * changes, like creation, deletion, update, resize + * @device_mutex: protects on-flash volume table and serializes volume + * creation, deletion, update, re-size, re-name and set + * property * * @max_ec: current highest erase counter value * @mean_ec: current mean erase counter value @@ -284,20 +419,33 @@ struct ubi_wl_entry; * @ltree: the lock tree * @alc_mutex: serializes "atomic LEB change" operations * + * @fm_disabled: non-zero if fastmap is disabled (default) + * @fm: in-memory data structure of the currently used fastmap + * @fm_pool: in-memory data structure of the fastmap pool + * @fm_wl_pool: in-memory data structure of the fastmap pool used by the WL + * sub-system + * @fm_mutex: serializes ubi_update_fastmap() and protects @fm_buf + * @fm_buf: vmalloc()'d buffer which holds the raw fastmap + * @fm_size: fastmap size in bytes + * @fm_sem: allows ubi_update_fastmap() to block EBA table changes + * @fm_work: fastmap work queue + * * @used: RB-tree of used physical eraseblocks + * @erroneous: RB-tree of erroneous used physical eraseblocks * @free: RB-tree of free physical eraseblocks + * @free_count: Contains the number of elements in @free * @scrub: RB-tree of physical eraseblocks which need scrubbing - * @prot: protection trees - * @prot.pnum: protection tree indexed by physical eraseblock numbers - * @prot.aec: protection tree indexed by absolute erase counter value - * @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from, - * @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works - * fields + * @pq: protection queue (contain physical eraseblocks which are temporarily + * protected from the wear-leveling worker) + * @pq_head: protection queue head + * @wl_lock: protects the @used, @free, @pq, @pq_head, @lookuptbl, @move_from, + * @move_to, @move_to_put @erase_pending, @wl_scheduled, @works, + * @erroneous, and @erroneous_peb_count fields * @move_mutex: serializes eraseblock moves + * @work_sem: synchronizes the WL worker with use tasks * @wl_scheduled: non-zero if the wear-leveling was scheduled * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any * physical eraseblock - * @abs_ec: absolute erase counter * @move_from: physical eraseblock from where the data is being moved * @move_to: physical eraseblock where the data is being moved to * @move_to_put: if the "to" PEB was put @@ -310,30 +458,38 @@ struct ubi_wl_entry; * @flash_size: underlying MTD device size (in bytes) * @peb_count: count of physical eraseblocks on the MTD device * @peb_size: physical eraseblock size + * @bad_peb_limit: top limit of expected bad physical eraseblocks * @bad_peb_count: count of bad physical eraseblocks * @good_peb_count: count of good physical eraseblocks + * @corr_peb_count: count of corrupted physical eraseblocks (preserved and not + * used by UBI) + * @erroneous_peb_count: count of erroneous physical eraseblocks in @erroneous + * @max_erroneous: maximum allowed amount of erroneous physical eraseblocks * @min_io_size: minimal input/output unit size of the underlying MTD device * @hdrs_min_io_size: minimal I/O unit size used for VID and EC headers * @ro_mode: if the UBI device is in read-only mode * @leb_size: logical eraseblock size * @leb_start: starting offset of logical eraseblocks within physical - * eraseblocks + * eraseblocks * @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size * @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size * @vid_hdr_offset: starting offset of the volume identifier header (might be - * unaligned) + * unaligned) * @vid_hdr_aloffset: starting offset of the VID header aligned to * @hdrs_min_io_size * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset * @bad_allowed: whether the MTD device admits of bad physical eraseblocks or * not + * @nor_flash: non-zero if working on top of NOR flash + * @max_write_size: maximum amount of bytes the underlying flash can write at a + * time (MTD write buffer size) * @mtd: MTD device descriptor * - * @peb_buf1: a buffer of PEB size used for different purposes - * @peb_buf2: another buffer of PEB size used for different purposes - * @buf_mutex: proptects @peb_buf1 and @peb_buf2 - * @dbg_peb_buf: buffer of PEB size used for debugging - * @dbg_buf_mutex: proptects @dbg_peb_buf + * @peb_buf: a buffer of PEB size used for different purposes + * @buf_mutex: protects @peb_buf + * @ckvol_mutex: serializes static volume checking when opening + * + * @dbg: debugging information for this UBI device */ struct ubi_device { struct cdev cdev; @@ -344,42 +500,56 @@ struct ubi_device { struct ubi_volume *volumes[UBI_MAX_VOLUMES+UBI_INT_VOL_COUNT]; spinlock_t volumes_lock; int ref_count; + int image_seq; int rsvd_pebs; int avail_pebs; int beb_rsvd_pebs; int beb_rsvd_level; + int bad_peb_limit; int autoresize_vol_id; int vtbl_slots; int vtbl_size; struct ubi_vtbl_record *vtbl; - struct mutex volumes_mutex; + struct mutex device_mutex; int max_ec; - /* TODO: mean_ec is not updated run-time, fix */ + /* Note, mean_ec is not updated run-time - should be fixed */ int mean_ec; - /* EBA unit's stuff */ + /* EBA sub-system's stuff */ unsigned long long global_sqnum; spinlock_t ltree_lock; struct rb_root ltree; struct mutex alc_mutex; - /* Wear-leveling unit's stuff */ + /* Fastmap stuff */ + int fm_disabled; + struct ubi_fastmap_layout *fm; + struct ubi_fm_pool fm_pool; + struct ubi_fm_pool fm_wl_pool; + struct rw_semaphore fm_sem; + struct mutex fm_mutex; + void *fm_buf; + size_t fm_size; +#ifndef __UBOOT__ + struct work_struct fm_work; +#endif + + /* Wear-leveling sub-system's stuff */ struct rb_root used; + struct rb_root erroneous; struct rb_root free; + int free_count; struct rb_root scrub; - struct { - struct rb_root pnum; - struct rb_root aec; - } prot; + struct list_head pq[UBI_PROT_QUEUE_LEN]; + int pq_head; spinlock_t wl_lock; struct mutex move_mutex; struct rw_semaphore work_sem; int wl_scheduled; struct ubi_wl_entry **lookuptbl; - unsigned long long abs_ec; struct ubi_wl_entry *move_from; struct ubi_wl_entry *move_to; int move_to_put; @@ -389,12 +559,15 @@ struct ubi_device { int thread_enabled; char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2]; - /* I/O unit's stuff */ + /* I/O sub-system's stuff */ long long flash_size; int peb_count; int peb_size; int bad_peb_count; int good_peb_count; + int corr_peb_count; + int erroneous_peb_count; + int max_erroneous; int min_io_size; int hdrs_min_io_size; int ro_mode; @@ -405,35 +578,195 @@ struct ubi_device { int vid_hdr_offset; int vid_hdr_aloffset; int vid_hdr_shift; - int bad_allowed; + unsigned int bad_allowed:1; + unsigned int nor_flash:1; + int max_write_size; struct mtd_info *mtd; - void *peb_buf1; - void *peb_buf2; + void *peb_buf; struct mutex buf_mutex; struct mutex ckvol_mutex; -#ifdef CONFIG_MTD_UBI_DEBUG - void *dbg_peb_buf; - struct mutex dbg_buf_mutex; -#endif + + struct ubi_debug_info dbg; }; +/** + * struct ubi_ainf_peb - attach information about a physical eraseblock. + * @ec: erase counter (%UBI_UNKNOWN if it is unknown) + * @pnum: physical eraseblock number + * @vol_id: ID of the volume this LEB belongs to + * @lnum: logical eraseblock number + * @scrub: if this physical eraseblock needs scrubbing + * @copy_flag: this LEB is a copy (@copy_flag is set in VID header of this LEB) + * @sqnum: sequence number + * @u: unions RB-tree or @list links + * @u.rb: link in the per-volume RB-tree of &struct ubi_ainf_peb objects + * @u.list: link in one of the eraseblock lists + * + * One object of this type is allocated for each physical eraseblock when + * attaching an MTD device. Note, if this PEB does not belong to any LEB / + * volume, the @vol_id and @lnum fields are initialized to %UBI_UNKNOWN. + */ +struct ubi_ainf_peb { + int ec; + int pnum; + int vol_id; + int lnum; + unsigned int scrub:1; + unsigned int copy_flag:1; + unsigned long long sqnum; + union { + struct rb_node rb; + struct list_head list; + } u; +}; + +/** + * struct ubi_ainf_volume - attaching information about a volume. + * @vol_id: volume ID + * @highest_lnum: highest logical eraseblock number in this volume + * @leb_count: number of logical eraseblocks in this volume + * @vol_type: volume type + * @used_ebs: number of used logical eraseblocks in this volume (only for + * static volumes) + * @last_data_size: amount of data in the last logical eraseblock of this + * volume (always equivalent to the usable logical eraseblock + * size in case of dynamic volumes) + * @data_pad: how many bytes at the end of logical eraseblocks of this volume + * are not used (due to volume alignment) + * @compat: compatibility flags of this volume + * @rb: link in the volume RB-tree + * @root: root of the RB-tree containing all the eraseblock belonging to this + * volume (&struct ubi_ainf_peb objects) + * + * One object of this type is allocated for each volume when attaching an MTD + * device. + */ +struct ubi_ainf_volume { + int vol_id; + int highest_lnum; + int leb_count; + int vol_type; + int used_ebs; + int last_data_size; + int data_pad; + int compat; + struct rb_node rb; + struct rb_root root; +}; + +/** + * struct ubi_attach_info - MTD device attaching information. + * @volumes: root of the volume RB-tree + * @corr: list of corrupted physical eraseblocks + * @free: list of free physical eraseblocks + * @erase: list of physical eraseblocks which have to be erased + * @alien: list of physical eraseblocks which should not be used by UBI (e.g., + * those belonging to "preserve"-compatible internal volumes) + * @corr_peb_count: count of PEBs in the @corr list + * @empty_peb_count: count of PEBs which are presumably empty (contain only + * 0xFF bytes) + * @alien_peb_count: count of PEBs in the @alien list + * @bad_peb_count: count of bad physical eraseblocks + * @maybe_bad_peb_count: count of bad physical eraseblocks which are not marked + * as bad yet, but which look like bad + * @vols_found: number of volumes found + * @highest_vol_id: highest volume ID + * @is_empty: flag indicating whether the MTD device is empty or not + * @min_ec: lowest erase counter value + * @max_ec: highest erase counter value + * @max_sqnum: highest sequence number value + * @mean_ec: mean erase counter value + * @ec_sum: a temporary variable used when calculating @mean_ec + * @ec_count: a temporary variable used when calculating @mean_ec + * @aeb_slab_cache: slab cache for &struct ubi_ainf_peb objects + * + * This data structure contains the result of attaching an MTD device and may + * be used by other UBI sub-systems to build final UBI data structures, further + * error-recovery and so on. + */ +struct ubi_attach_info { + struct rb_root volumes; + struct list_head corr; + struct list_head free; + struct list_head erase; + struct list_head alien; + int corr_peb_count; + int empty_peb_count; + int alien_peb_count; + int bad_peb_count; + int maybe_bad_peb_count; + int vols_found; + int highest_vol_id; + int is_empty; + int min_ec; + int max_ec; + unsigned long long max_sqnum; + int mean_ec; + uint64_t ec_sum; + int ec_count; + struct kmem_cache *aeb_slab_cache; +}; + +/** + * struct ubi_work - UBI work description data structure. + * @list: a link in the list of pending works + * @func: worker function + * @e: physical eraseblock to erase + * @vol_id: the volume ID on which this erasure is being performed + * @lnum: the logical eraseblock number + * @torture: if the physical eraseblock has to be tortured + * @anchor: produce a anchor PEB to by used by fastmap + * + * The @func pointer points to the worker function. If the @cancel argument is + * not zero, the worker has to free the resources and exit immediately. The + * worker has to return zero in case of success and a negative error code in + * case of failure. + */ +struct ubi_work { + struct list_head list; + int (*func)(struct ubi_device *ubi, struct ubi_work *wrk, int cancel); + /* The below fields are only relevant to erasure works */ + struct ubi_wl_entry *e; + int vol_id; + int lnum; + int torture; + int anchor; +}; + +#include "debug.h" + extern struct kmem_cache *ubi_wl_entry_slab; -extern struct file_operations ubi_ctrl_cdev_operations; -extern struct file_operations ubi_cdev_operations; -extern struct file_operations ubi_vol_cdev_operations; +extern const struct file_operations ubi_ctrl_cdev_operations; +extern const struct file_operations ubi_cdev_operations; +extern const struct file_operations ubi_vol_cdev_operations; extern struct class *ubi_class; extern struct mutex ubi_devices_mutex; +extern struct blocking_notifier_head ubi_notifiers; + +/* attach.c */ +int ubi_add_to_av(struct ubi_device *ubi, struct ubi_attach_info *ai, int pnum, + int ec, const struct ubi_vid_hdr *vid_hdr, int bitflips); +struct ubi_ainf_volume *ubi_find_av(const struct ubi_attach_info *ai, + int vol_id); +void ubi_remove_av(struct ubi_attach_info *ai, struct ubi_ainf_volume *av); +struct ubi_ainf_peb *ubi_early_get_peb(struct ubi_device *ubi, + struct ubi_attach_info *ai); +int ubi_attach(struct ubi_device *ubi, int force_scan); +void ubi_destroy_ai(struct ubi_attach_info *ai); /* vtbl.c */ int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, struct ubi_vtbl_record *vtbl_rec); -int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si); +int ubi_vtbl_rename_volumes(struct ubi_device *ubi, + struct list_head *rename_list); +int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_attach_info *ai); /* vmt.c */ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req); -int ubi_remove_volume(struct ubi_volume_desc *desc); +int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl); int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs); +int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list); int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol); void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol); @@ -448,9 +781,12 @@ int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol, const void __user *buf, int count); /* misc.c */ -int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, int length); +int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, + int length); int ubi_check_volume(struct ubi_device *ubi, int vol_id); +void ubi_update_reserved(struct ubi_device *ubi); void ubi_calculate_reserved(struct ubi_device *ubi); +int ubi_check_pattern(const void *buf, uint8_t patt, int size); /* gluebi.c */ #ifdef CONFIG_MTD_UBI_GLUEBI @@ -474,25 +810,33 @@ int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol, int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, void *buf, int offset, int len, int check); int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, - const void *buf, int offset, int len, int dtype); + const void *buf, int offset, int len); int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol, - int lnum, const void *buf, int len, int dtype, - int used_ebs); + int lnum, const void *buf, int len, int used_ebs); int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, - int lnum, const void *buf, int len, int dtype); + int lnum, const void *buf, int len); int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, struct ubi_vid_hdr *vid_hdr); -int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si); -void ubi_eba_close(const struct ubi_device *ubi); +int ubi_eba_init(struct ubi_device *ubi, struct ubi_attach_info *ai); +unsigned long long ubi_next_sqnum(struct ubi_device *ubi); +int self_check_eba(struct ubi_device *ubi, struct ubi_attach_info *ai_fastmap, + struct ubi_attach_info *ai_scan); /* wl.c */ -int ubi_wl_get_peb(struct ubi_device *ubi, int dtype); -int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture); -int ubi_wl_flush(struct ubi_device *ubi); +int ubi_wl_get_peb(struct ubi_device *ubi); +int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum, + int pnum, int torture); +int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum); int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum); -int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si); +int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai); void ubi_wl_close(struct ubi_device *ubi); int ubi_thread(void *u); +struct ubi_wl_entry *ubi_wl_get_fm_peb(struct ubi_device *ubi, int anchor); +int ubi_wl_put_fm_peb(struct ubi_device *ubi, struct ubi_wl_entry *used_e, + int lnum, int torture); +int ubi_is_erase_work(struct ubi_work *wrk); +void ubi_refill_pools(struct ubi_device *ubi); +int ubi_ensure_anchor_pebs(struct ubi_device *ubi); /* io.c */ int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, @@ -512,16 +856,37 @@ int ubi_io_write_vid_hdr(struct ubi_device *ubi, int pnum, struct ubi_vid_hdr *vid_hdr); /* build.c */ -int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset); +int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, + int vid_hdr_offset, int max_beb_per1024); int ubi_detach_mtd_dev(int ubi_num, int anyway); struct ubi_device *ubi_get_device(int ubi_num); void ubi_put_device(struct ubi_device *ubi); struct ubi_device *ubi_get_by_major(int major); int ubi_major2num(int major); +int ubi_volume_notify(struct ubi_device *ubi, struct ubi_volume *vol, + int ntype); +int ubi_notify_all(struct ubi_device *ubi, int ntype, + struct notifier_block *nb); +int ubi_enumerate_volumes(struct notifier_block *nb); +void ubi_free_internal_volumes(struct ubi_device *ubi); + +/* kapi.c */ +void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di); +void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol, + struct ubi_volume_info *vi); +/* scan.c */ +int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, + int pnum, const struct ubi_vid_hdr *vid_hdr); + +/* fastmap.c */ +size_t ubi_calc_fm_size(struct ubi_device *ubi); +int ubi_update_fastmap(struct ubi_device *ubi); +int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai, + int fm_anchor); /* * ubi_rb_for_each_entry - walk an RB-tree. - * @rb: a pointer to type 'struct rb_node' to to use as a loop counter + * @rb: a pointer to type 'struct rb_node' to use as a loop counter * @pos: a pointer to RB-tree entry type to use as a loop counter * @root: RB-tree's root * @member: the name of the 'struct rb_node' within the RB-tree entry @@ -530,7 +895,23 @@ int ubi_major2num(int major); for (rb = rb_first(root), \ pos = (rb ? container_of(rb, typeof(*pos), member) : NULL); \ rb; \ - rb = rb_next(rb), pos = container_of(rb, typeof(*pos), member)) + rb = rb_next(rb), \ + pos = (rb ? container_of(rb, typeof(*pos), member) : NULL)) + +/* + * ubi_move_aeb_to_list - move a PEB from the volume tree to a list. + * + * @av: volume attaching information + * @aeb: attaching eraseblock information + * @list: the list to move to + */ +static inline void ubi_move_aeb_to_list(struct ubi_ainf_volume *av, + struct ubi_ainf_peb *aeb, + struct list_head *list) +{ + rb_erase(&aeb->u.rb, &av->root); + list_add_tail(&aeb->u.list, list); +} /** * ubi_zalloc_vid_hdr - allocate a volume identifier header object. @@ -606,6 +987,7 @@ static inline void ubi_ro_mode(struct ubi_device *ubi) if (!ubi->ro_mode) { ubi->ro_mode = 1; ubi_warn("switch to read-only mode"); + dump_stack(); } } diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c index e597f82..220c120 100644 --- a/drivers/mtd/ubi/upd.c +++ b/drivers/mtd/ubi/upd.c @@ -26,13 +26,16 @@ * transaction with a roll-back capability. */ -#ifdef UBI_LINUX -#include <linux/err.h> -#include <asm/uaccess.h> -#include <asm/div64.h> +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/uaccess.h> +#else +#include <div64.h> +#include <ubi_uboot.h> #endif +#include <linux/err.h> +#include <linux/math64.h> -#include <ubi_uboot.h> #include "ubi.h" /** @@ -48,22 +51,21 @@ static int set_update_marker(struct ubi_device *ubi, struct ubi_volume *vol) int err; struct ubi_vtbl_record vtbl_rec; - dbg_msg("set update marker for volume %d", vol->vol_id); + dbg_gen("set update marker for volume %d", vol->vol_id); if (vol->upd_marker) { ubi_assert(ubi->vtbl[vol->vol_id].upd_marker); - dbg_msg("already set"); + dbg_gen("already set"); return 0; } - memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id], - sizeof(struct ubi_vtbl_record)); + vtbl_rec = ubi->vtbl[vol->vol_id]; vtbl_rec.upd_marker = 1; - mutex_lock(&ubi->volumes_mutex); + mutex_lock(&ubi->device_mutex); err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec); - mutex_unlock(&ubi->volumes_mutex); vol->upd_marker = 1; + mutex_unlock(&ubi->device_mutex); return err; } @@ -81,31 +83,29 @@ static int clear_update_marker(struct ubi_device *ubi, struct ubi_volume *vol, long long bytes) { int err; - uint64_t tmp; struct ubi_vtbl_record vtbl_rec; - dbg_msg("clear update marker for volume %d", vol->vol_id); + dbg_gen("clear update marker for volume %d", vol->vol_id); - memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id], - sizeof(struct ubi_vtbl_record)); + vtbl_rec = ubi->vtbl[vol->vol_id]; ubi_assert(vol->upd_marker && vtbl_rec.upd_marker); vtbl_rec.upd_marker = 0; if (vol->vol_type == UBI_STATIC_VOLUME) { vol->corrupted = 0; - vol->used_bytes = tmp = bytes; - vol->last_eb_bytes = do_div(tmp, vol->usable_leb_size); - vol->used_ebs = tmp; + vol->used_bytes = bytes; + vol->used_ebs = div_u64_rem(bytes, vol->usable_leb_size, + &vol->last_eb_bytes); if (vol->last_eb_bytes) vol->used_ebs += 1; else vol->last_eb_bytes = vol->usable_leb_size; } - mutex_lock(&ubi->volumes_mutex); + mutex_lock(&ubi->device_mutex); err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec); - mutex_unlock(&ubi->volumes_mutex); vol->upd_marker = 0; + mutex_unlock(&ubi->device_mutex); return err; } @@ -123,9 +123,8 @@ int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, long long bytes) { int i, err; - uint64_t tmp; - dbg_msg("start update of volume %d, %llu bytes", vol->vol_id, bytes); + dbg_gen("start update of volume %d, %llu bytes", vol->vol_id, bytes); ubi_assert(!vol->updating && !vol->changing_leb); vol->updating = 1; @@ -141,21 +140,23 @@ int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, } if (bytes == 0) { + err = ubi_wl_flush(ubi, UBI_ALL, UBI_ALL); + if (err) + return err; + err = clear_update_marker(ubi, vol, 0); if (err) return err; - err = ubi_wl_flush(ubi); - if (!err) - vol->updating = 0; + vol->updating = 0; + return 0; } vol->upd_buf = vmalloc(ubi->leb_size); if (!vol->upd_buf) return -ENOMEM; - tmp = bytes; - vol->upd_ebs = !!do_div(tmp, vol->usable_leb_size); - vol->upd_ebs += tmp; + vol->upd_ebs = div_u64(bytes + vol->usable_leb_size - 1, + vol->usable_leb_size); vol->upd_bytes = bytes; vol->upd_received = 0; return 0; @@ -175,17 +176,15 @@ int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, { ubi_assert(!vol->updating && !vol->changing_leb); - dbg_msg("start changing LEB %d:%d, %u bytes", + dbg_gen("start changing LEB %d:%d, %u bytes", vol->vol_id, req->lnum, req->bytes); if (req->bytes == 0) - return ubi_eba_atomic_leb_change(ubi, vol, req->lnum, NULL, 0, - req->dtype); + return ubi_eba_atomic_leb_change(ubi, vol, req->lnum, NULL, 0); vol->upd_bytes = req->bytes; vol->upd_received = 0; vol->changing_leb = 1; vol->ch_lnum = req->lnum; - vol->ch_dtype = req->dtype; vol->upd_buf = vmalloc(req->bytes); if (!vol->upd_buf) @@ -234,11 +233,11 @@ static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, memset(buf + len, 0xFF, l - len); len = ubi_calc_data_len(ubi, buf, l); if (len == 0) { - dbg_msg("all %d bytes contain 0xFF - skip", len); + dbg_gen("all %d bytes contain 0xFF - skip", len); return 0; } - err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len, UBI_UNKNOWN); + err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len); } else { /* * When writing static volume, and this is the last logical @@ -250,8 +249,7 @@ static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, * contain zeros, not random trash. */ memset(buf + len, 0, vol->usable_leb_size - len); - err = ubi_eba_write_leb_st(ubi, vol, lnum, buf, len, - UBI_UNKNOWN, used_ebs); + err = ubi_eba_write_leb_st(ubi, vol, lnum, buf, len, used_ebs); } return err; @@ -259,6 +257,7 @@ static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, /** * ubi_more_update_data - write more update data. + * @ubi: UBI device description object * @vol: volume description object * @buf: write data (user-space memory buffer) * @count: how much bytes to write @@ -272,19 +271,20 @@ static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol, const void __user *buf, int count) { - uint64_t tmp; +#ifndef __UBOOT__ int lnum, offs, err = 0, len, to_write = count; +#else + int lnum, err = 0, len, to_write = count; + u32 offs; +#endif - dbg_msg("write %d of %lld bytes, %lld already passed", + dbg_gen("write %d of %lld bytes, %lld already passed", count, vol->upd_bytes, vol->upd_received); if (ubi->ro_mode) return -EROFS; - tmp = vol->upd_received; - offs = do_div(tmp, vol->usable_leb_size); - lnum = tmp; - + lnum = div_u64_rem(vol->upd_received, vol->usable_leb_size, &offs); if (vol->upd_received + count > vol->upd_bytes) to_write = count = vol->upd_bytes - vol->upd_received; @@ -359,16 +359,16 @@ int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol, ubi_assert(vol->upd_received <= vol->upd_bytes); if (vol->upd_received == vol->upd_bytes) { + err = ubi_wl_flush(ubi, UBI_ALL, UBI_ALL); + if (err) + return err; /* The update is finished, clear the update marker */ err = clear_update_marker(ubi, vol, vol->upd_bytes); if (err) return err; - err = ubi_wl_flush(ubi); - if (err == 0) { - vol->updating = 0; - err = to_write; - vfree(vol->upd_buf); - } + vol->updating = 0; + err = to_write; + vfree(vol->upd_buf); } return err; @@ -376,6 +376,7 @@ int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol, /** * ubi_more_leb_change_data - accept more data for atomic LEB change. + * @ubi: UBI device description object * @vol: volume description object * @buf: write data (user-space memory buffer) * @count: how much bytes to write @@ -392,7 +393,7 @@ int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol, { int err; - dbg_msg("write %d of %lld bytes, %lld already passed", + dbg_gen("write %d of %lld bytes, %lld already passed", count, vol->upd_bytes, vol->upd_received); if (ubi->ro_mode) @@ -410,10 +411,11 @@ int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol, if (vol->upd_received == vol->upd_bytes) { int len = ALIGN((int)vol->upd_bytes, ubi->min_io_size); - memset(vol->upd_buf + vol->upd_bytes, 0xFF, len - vol->upd_bytes); + memset(vol->upd_buf + vol->upd_bytes, 0xFF, + len - vol->upd_bytes); len = ubi_calc_data_len(ubi, vol->upd_buf, len); err = ubi_eba_atomic_leb_change(ubi, vol, vol->ch_lnum, - vol->upd_buf, len, UBI_UNKNOWN); + vol->upd_buf, len); if (err) return err; } diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index c4e894b..d9665a4 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -11,21 +11,22 @@ * resizing. */ -#ifdef UBI_LINUX +#define __UBOOT__ +#ifndef __UBOOT__ #include <linux/err.h> -#include <asm/div64.h> +#include <linux/slab.h> +#include <linux/export.h> +#else +#include <div64.h> +#include <ubi_uboot.h> #endif +#include <linux/math64.h> -#include <ubi_uboot.h> #include "ubi.h" -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID -static void paranoid_check_volumes(struct ubi_device *ubi); -#else -#define paranoid_check_volumes(ubi) -#endif +static int self_check_volumes(struct ubi_device *ubi); -#ifdef UBI_LINUX +#ifndef __UBOOT__ static ssize_t vol_attribute_show(struct device *dev, struct device_attribute *attr, char *buf); @@ -121,10 +122,11 @@ static void vol_release(struct device *dev) { struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); + kfree(vol->eba_tbl); kfree(vol); } -#ifdef UBI_LINUX +#ifndef __UBOOT__ /** * volume_sysfs_init - initialize sysfs for new volume. * @ubi: UBI device description object @@ -193,14 +195,13 @@ static void volume_sysfs_close(struct ubi_volume *vol) * %UBI_VOL_NUM_AUTO, this function automatically assign ID to the new volume * and saves it in @req->vol_id. Returns zero in case of success and a negative * error code in case of failure. Note, the caller has to have the - * @ubi->volumes_mutex locked. + * @ubi->device_mutex locked. */ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) { - int i, err, vol_id = req->vol_id, dont_free = 0; + int i, err, vol_id = req->vol_id, do_free = 1; struct ubi_volume *vol; struct ubi_vtbl_record vtbl_rec; - uint64_t bytes; dev_t dev; if (ubi->ro_mode) @@ -213,7 +214,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) spin_lock(&ubi->volumes_lock); if (vol_id == UBI_VOL_NUM_AUTO) { /* Find unused volume ID */ - dbg_msg("search for vacant volume ID"); + dbg_gen("search for vacant volume ID"); for (i = 0; i < ubi->vtbl_slots; i++) if (!ubi->volumes[i]) { vol_id = i; @@ -221,21 +222,21 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) } if (vol_id == UBI_VOL_NUM_AUTO) { - dbg_err("out of volume IDs"); + ubi_err("out of volume IDs"); err = -ENFILE; goto out_unlock; } req->vol_id = vol_id; } - dbg_msg("volume ID %d, %llu bytes, type %d, name %s", - vol_id, (unsigned long long)req->bytes, + dbg_gen("create device %d, volume %d, %llu bytes, type %d, name %s", + ubi->ubi_num, vol_id, (unsigned long long)req->bytes, (int)req->vol_type, req->name); /* Ensure that this volume does not exist */ err = -EEXIST; if (ubi->volumes[vol_id]) { - dbg_err("volume %d already exists", vol_id); + ubi_err("volume %d already exists", vol_id); goto out_unlock; } @@ -244,20 +245,21 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) if (ubi->volumes[i] && ubi->volumes[i]->name_len == req->name_len && !strcmp(ubi->volumes[i]->name, req->name)) { - dbg_err("volume \"%s\" exists (ID %d)", req->name, i); + ubi_err("volume \"%s\" exists (ID %d)", req->name, i); goto out_unlock; } /* Calculate how many eraseblocks are requested */ vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment; - bytes = req->bytes; - if (do_div(bytes, vol->usable_leb_size)) - vol->reserved_pebs = 1; - vol->reserved_pebs += bytes; + vol->reserved_pebs += div_u64(req->bytes + vol->usable_leb_size - 1, + vol->usable_leb_size); /* Reserve physical eraseblocks */ if (vol->reserved_pebs > ubi->avail_pebs) { - dbg_err("not enough PEBs, only %d available", ubi->avail_pebs); + ubi_err("not enough PEBs, only %d available", ubi->avail_pebs); + if (ubi->corr_peb_count) + ubi_err("%d PEBs are corrupted and not used", + ubi->corr_peb_count); err = -ENOSPC; goto out_unlock; } @@ -270,14 +272,14 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) vol->data_pad = ubi->leb_size % vol->alignment; vol->vol_type = req->vol_type; vol->name_len = req->name_len; - memcpy(vol->name, req->name, vol->name_len + 1); + memcpy(vol->name, req->name, vol->name_len); vol->ubi = ubi; /* * Finish all pending erases because there may be some LEBs belonging * to the same volume ID. */ - err = ubi_wl_flush(ubi); + err = ubi_wl_flush(ubi, vol_id, UBI_ALL); if (err) goto out_acc; @@ -296,10 +298,10 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) vol->used_bytes = (long long)vol->used_ebs * vol->usable_leb_size; } else { - bytes = vol->used_bytes; - vol->last_eb_bytes = do_div(bytes, vol->usable_leb_size); - vol->used_ebs = bytes; - if (vol->last_eb_bytes) + vol->used_ebs = div_u64_rem(vol->used_bytes, + vol->usable_leb_size, + &vol->last_eb_bytes); + if (vol->last_eb_bytes != 0) vol->used_ebs += 1; else vol->last_eb_bytes = vol->usable_leb_size; @@ -315,20 +317,16 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) goto out_mapping; } - err = ubi_create_gluebi(ubi, vol); - if (err) - goto out_cdev; - vol->dev.release = vol_release; vol->dev.parent = &ubi->dev; vol->dev.devt = dev; vol->dev.class = ubi_class; - sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id); + dev_set_name(&vol->dev, "%s_%d", ubi->ubi_name, vol->vol_id); err = device_register(&vol->dev); if (err) { ubi_err("cannot register device"); - goto out_gluebi; + goto out_cdev; } err = volume_sysfs_init(ubi, vol); @@ -345,7 +343,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) vtbl_rec.vol_type = UBI_VID_DYNAMIC; else vtbl_rec.vol_type = UBI_VID_STATIC; - memcpy(vtbl_rec.name, vol->name, vol->name_len + 1); + memcpy(vtbl_rec.name, vol->name, vol->name_len); err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); if (err) @@ -356,39 +354,37 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) ubi->vol_count += 1; spin_unlock(&ubi->volumes_lock); - paranoid_check_volumes(ubi); - return 0; + ubi_volume_notify(ubi, vol, UBI_VOLUME_ADDED); + self_check_volumes(ubi); + return err; out_sysfs: /* - * We have registered our device, we should not free the volume* + * We have registered our device, we should not free the volume * description object in this function in case of an error - it is * freed by the release function. * * Get device reference to prevent the release function from being * called just after sysfs has been closed. */ - dont_free = 1; + do_free = 0; get_device(&vol->dev); volume_sysfs_close(vol); -out_gluebi: - if (ubi_destroy_gluebi(vol)) - dbg_err("cannot destroy gluebi for volume %d:%d", - ubi->ubi_num, vol_id); out_cdev: cdev_del(&vol->cdev); out_mapping: - kfree(vol->eba_tbl); + if (do_free) + kfree(vol->eba_tbl); out_acc: spin_lock(&ubi->volumes_lock); ubi->rsvd_pebs -= vol->reserved_pebs; ubi->avail_pebs += vol->reserved_pebs; out_unlock: spin_unlock(&ubi->volumes_lock); - if (dont_free) - put_device(&vol->dev); - else + if (do_free) kfree(vol); + else + put_device(&vol->dev); ubi_err("cannot create volume %d, error %d", vol_id, err); return err; } @@ -396,19 +392,20 @@ out_unlock: /** * ubi_remove_volume - remove volume. * @desc: volume descriptor + * @no_vtbl: do not change volume table if not zero * * This function removes volume described by @desc. The volume has to be opened * in "exclusive" mode. Returns zero in case of success and a negative error - * code in case of failure. The caller has to have the @ubi->volumes_mutex + * code in case of failure. The caller has to have the @ubi->device_mutex * locked. */ -int ubi_remove_volume(struct ubi_volume_desc *desc) +int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl) { struct ubi_volume *vol = desc->vol; struct ubi_device *ubi = vol->ubi; int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs; - dbg_msg("remove UBI volume %d", vol_id); + dbg_gen("remove device %d, volume %d", ubi->ubi_num, vol_id); ubi_assert(desc->mode == UBI_EXCLUSIVE); ubi_assert(vol == ubi->volumes[vol_id]); @@ -427,13 +424,11 @@ int ubi_remove_volume(struct ubi_volume_desc *desc) ubi->volumes[vol_id] = NULL; spin_unlock(&ubi->volumes_lock); - err = ubi_destroy_gluebi(vol); - if (err) - goto out_err; - - err = ubi_change_vtbl_record(ubi, vol_id, NULL); - if (err) - goto out_err; + if (!no_vtbl) { + err = ubi_change_vtbl_record(ubi, vol_id, NULL); + if (err) + goto out_err; + } for (i = 0; i < vol->reserved_pebs; i++) { err = ubi_eba_unmap_leb(ubi, vol, i); @@ -441,28 +436,21 @@ int ubi_remove_volume(struct ubi_volume_desc *desc) goto out_err; } - kfree(vol->eba_tbl); - vol->eba_tbl = NULL; cdev_del(&vol->cdev); volume_sysfs_close(vol); spin_lock(&ubi->volumes_lock); ubi->rsvd_pebs -= reserved_pebs; ubi->avail_pebs += reserved_pebs; - i = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs; - if (i > 0) { - i = ubi->avail_pebs >= i ? i : ubi->avail_pebs; - ubi->avail_pebs -= i; - ubi->rsvd_pebs += i; - ubi->beb_rsvd_pebs += i; - if (i > 0) - ubi_msg("reserve more %d PEBs", i); - } + ubi_update_reserved(ubi); ubi->vol_count -= 1; spin_unlock(&ubi->volumes_lock); - paranoid_check_volumes(ubi); - return 0; + ubi_volume_notify(ubi, vol, UBI_VOLUME_REMOVED); + if (!no_vtbl) + self_check_volumes(ubi); + + return err; out_err: ubi_err("cannot remove volume %d, error %d", vol_id, err); @@ -480,7 +468,7 @@ out_unlock: * * This function re-sizes the volume and returns zero in case of success, and a * negative error code in case of failure. The caller has to have the - * @ubi->volumes_mutex locked. + * @ubi->device_mutex locked. */ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) { @@ -493,12 +481,12 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) if (ubi->ro_mode) return -EROFS; - dbg_msg("re-size volume %d to from %d to %d PEBs", - vol_id, vol->reserved_pebs, reserved_pebs); + dbg_gen("re-size device %d, volume %d to from %d to %d PEBs", + ubi->ubi_num, vol_id, vol->reserved_pebs, reserved_pebs); if (vol->vol_type == UBI_STATIC_VOLUME && reserved_pebs < vol->used_ebs) { - dbg_err("too small size %d, %d LEBs contain data", + ubi_err("too small size %d, %d LEBs contain data", reserved_pebs, vol->used_ebs); return -EINVAL; } @@ -527,8 +515,11 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) if (pebs > 0) { spin_lock(&ubi->volumes_lock); if (pebs > ubi->avail_pebs) { - dbg_err("not enough PEBs: requested %d, available %d", + ubi_err("not enough PEBs: requested %d, available %d", pebs, ubi->avail_pebs); + if (ubi->corr_peb_count) + ubi_err("%d PEBs are corrupted and not used", + ubi->corr_peb_count); spin_unlock(&ubi->volumes_lock); err = -ENOSPC; goto out_free; @@ -543,7 +534,7 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) } /* Change volume table record */ - memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record)); + vtbl_rec = ubi->vtbl[vol_id]; vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs); err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); if (err) @@ -558,15 +549,7 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) spin_lock(&ubi->volumes_lock); ubi->rsvd_pebs += pebs; ubi->avail_pebs -= pebs; - pebs = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs; - if (pebs > 0) { - pebs = ubi->avail_pebs >= pebs ? pebs : ubi->avail_pebs; - ubi->avail_pebs -= pebs; - ubi->rsvd_pebs += pebs; - ubi->beb_rsvd_pebs += pebs; - if (pebs > 0) - ubi_msg("reserve more %d PEBs", pebs); - } + ubi_update_reserved(ubi); for (i = 0; i < reserved_pebs; i++) new_mapping[i] = vol->eba_tbl[i]; kfree(vol->eba_tbl); @@ -582,8 +565,9 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) (long long)vol->used_ebs * vol->usable_leb_size; } - paranoid_check_volumes(ubi); - return 0; + ubi_volume_notify(ubi, vol, UBI_VOLUME_RESIZED); + self_check_volumes(ubi); + return err; out_acc: if (pebs > 0) { @@ -598,6 +582,45 @@ out_free: } /** + * ubi_rename_volumes - re-name UBI volumes. + * @ubi: UBI device description object + * @rename_list: list of &struct ubi_rename_entry objects + * + * This function re-names or removes volumes specified in the re-name list. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list) +{ + int err; + struct ubi_rename_entry *re; + + err = ubi_vtbl_rename_volumes(ubi, rename_list); + if (err) + return err; + + list_for_each_entry(re, rename_list, list) { + if (re->remove) { + err = ubi_remove_volume(re->desc, 1); + if (err) + break; + } else { + struct ubi_volume *vol = re->desc->vol; + + spin_lock(&ubi->volumes_lock); + vol->name_len = re->new_name_len; + memcpy(vol->name, re->new_name, re->new_name_len + 1); + spin_unlock(&ubi->volumes_lock); + ubi_volume_notify(ubi, vol, UBI_VOLUME_RENAMED); + } + } + + if (!err) + self_check_volumes(ubi); + return err; +} + +/** * ubi_add_volume - add volume. * @ubi: UBI device description object * @vol: volume description object @@ -611,8 +634,7 @@ int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol) int err, vol_id = vol->vol_id; dev_t dev; - dbg_msg("add volume %d", vol_id); - ubi_dbg_dump_vol_info(vol); + dbg_gen("add volume %d", vol_id); /* Register character device for the volume */ cdev_init(&vol->cdev, &ubi_vol_cdev_operations); @@ -625,32 +647,25 @@ int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol) return err; } - err = ubi_create_gluebi(ubi, vol); - if (err) - goto out_cdev; - vol->dev.release = vol_release; vol->dev.parent = &ubi->dev; vol->dev.devt = dev; vol->dev.class = ubi_class; - sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id); + dev_set_name(&vol->dev, "%s_%d", ubi->ubi_name, vol->vol_id); err = device_register(&vol->dev); if (err) - goto out_gluebi; + goto out_cdev; err = volume_sysfs_init(ubi, vol); if (err) { cdev_del(&vol->cdev); - err = ubi_destroy_gluebi(vol); volume_sysfs_close(vol); return err; } - paranoid_check_volumes(ubi); - return 0; + self_check_volumes(ubi); + return err; -out_gluebi: - err = ubi_destroy_gluebi(vol); out_cdev: cdev_del(&vol->cdev); return err; @@ -666,22 +681,21 @@ out_cdev: */ void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol) { - dbg_msg("free volume %d", vol->vol_id); + dbg_gen("free volume %d", vol->vol_id); ubi->volumes[vol->vol_id] = NULL; - ubi_destroy_gluebi(vol); cdev_del(&vol->cdev); volume_sysfs_close(vol); } -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID - /** - * paranoid_check_volume - check volume information. + * self_check_volume - check volume information. * @ubi: UBI device description object * @vol_id: volume ID + * + * Returns zero if volume is all right and a a negative error code if not. */ -static void paranoid_check_volume(struct ubi_device *ubi, int vol_id) +static int self_check_volume(struct ubi_device *ubi, int vol_id) { int idx = vol_id2idx(ubi, vol_id); int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker; @@ -699,16 +713,7 @@ static void paranoid_check_volume(struct ubi_device *ubi, int vol_id) goto fail; } spin_unlock(&ubi->volumes_lock); - return; - } - - if (vol->exclusive) { - /* - * The volume may be being created at the moment, do not check - * it (e.g., it may be in the middle of ubi_create_volume(). - */ - spin_unlock(&ubi->volumes_lock); - return; + return 0; } if (vol->reserved_pebs < 0 || vol->alignment < 0 || vol->data_pad < 0 || @@ -740,7 +745,7 @@ static void paranoid_check_volume(struct ubi_device *ubi, int vol_id) } if (vol->upd_marker && vol->corrupted) { - dbg_err("update marker and corrupted simultaneously"); + ubi_err("update marker and corrupted simultaneously"); goto fail; } @@ -760,11 +765,6 @@ static void paranoid_check_volume(struct ubi_device *ubi, int vol_id) goto fail; } - if (!vol->name) { - ubi_err("NULL volume name"); - goto fail; - } - n = strnlen(vol->name, vol->name_len + 1); if (n != vol->name_len) { ubi_err("bad name_len %lld", n); @@ -818,31 +818,42 @@ static void paranoid_check_volume(struct ubi_device *ubi, int vol_id) if (alignment != vol->alignment || data_pad != vol->data_pad || upd_marker != vol->upd_marker || vol_type != vol->vol_type || - name_len!= vol->name_len || strncmp(name, vol->name, name_len)) { + name_len != vol->name_len || strncmp(name, vol->name, name_len)) { ubi_err("volume info is different"); goto fail; } spin_unlock(&ubi->volumes_lock); - return; + return 0; fail: - ubi_err("paranoid check failed for volume %d", vol_id); - ubi_dbg_dump_vol_info(vol); - ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id); + ubi_err("self-check failed for volume %d", vol_id); + if (vol) + ubi_dump_vol_info(vol); + ubi_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id); + dump_stack(); spin_unlock(&ubi->volumes_lock); - BUG(); + return -EINVAL; } /** - * paranoid_check_volumes - check information about all volumes. + * self_check_volumes - check information about all volumes. * @ubi: UBI device description object + * + * Returns zero if volumes are all right and a a negative error code if not. */ -static void paranoid_check_volumes(struct ubi_device *ubi) +static int self_check_volumes(struct ubi_device *ubi) { - int i; + int i, err = 0; - for (i = 0; i < ubi->vtbl_slots; i++) - paranoid_check_volume(ubi, i); + if (!ubi_dbg_chk_gen(ubi)) + return 0; + + for (i = 0; i < ubi->vtbl_slots; i++) { + err = self_check_volume(ubi, i); + if (err) + break; + } + + return err; } -#endif diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index 3fbb4a0..e6c8f5b 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -25,16 +25,15 @@ * LEB 1. This scheme guarantees recoverability from unclean reboots. * * In this UBI implementation the on-flash volume table does not contain any - * information about how many data static volumes contain. This information may - * be found from the scanning data. + * information about how much data static volumes contain. * * But it would still be beneficial to store this information in the volume * table. For example, suppose we have a static volume X, and all its physical * eraseblocks became bad for some reasons. Suppose we are attaching the - * corresponding MTD device, the scanning has found no logical eraseblocks + * corresponding MTD device, for some reason we find no logical eraseblocks * corresponding to the volume X. According to the volume table volume X does * exist. So we don't know whether it is just empty or all its physical - * eraseblocks went bad. So we cannot alarm the user about this corruption. + * eraseblocks went bad. So we cannot alarm the user properly. * * The volume table also stores so-called "update marker", which is used for * volume updates. Before updating the volume, the update marker is set, and @@ -44,20 +43,20 @@ * damaged. */ -#ifdef UBI_LINUX +#define __UBOOT__ +#ifndef __UBOOT__ #include <linux/crc32.h> #include <linux/err.h> +#include <linux/slab.h> #include <asm/div64.h> +#else +#include <ubi_uboot.h> #endif -#include <ubi_uboot.h> +#include <linux/err.h> #include "ubi.h" -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID -static void paranoid_vtbl_check(const struct ubi_device *ubi); -#else -#define paranoid_vtbl_check(ubi) -#endif +static void self_vtbl_check(const struct ubi_device *ubi); /* Empty volume table record */ static struct ubi_vtbl_record empty_vtbl_record; @@ -97,18 +96,68 @@ int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, return err; err = ubi_eba_write_leb(ubi, layout_vol, i, ubi->vtbl, 0, - ubi->vtbl_size, UBI_LONGTERM); + ubi->vtbl_size); + if (err) + return err; + } + + self_vtbl_check(ubi); + return 0; +} + +/** + * ubi_vtbl_rename_volumes - rename UBI volumes in the volume table. + * @ubi: UBI device description object + * @rename_list: list of &struct ubi_rename_entry objects + * + * This function re-names multiple volumes specified in @req in the volume + * table. Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubi_vtbl_rename_volumes(struct ubi_device *ubi, + struct list_head *rename_list) +{ + int i, err; + struct ubi_rename_entry *re; + struct ubi_volume *layout_vol; + + list_for_each_entry(re, rename_list, list) { + uint32_t crc; + struct ubi_volume *vol = re->desc->vol; + struct ubi_vtbl_record *vtbl_rec = &ubi->vtbl[vol->vol_id]; + + if (re->remove) { + memcpy(vtbl_rec, &empty_vtbl_record, + sizeof(struct ubi_vtbl_record)); + continue; + } + + vtbl_rec->name_len = cpu_to_be16(re->new_name_len); + memcpy(vtbl_rec->name, re->new_name, re->new_name_len); + memset(vtbl_rec->name + re->new_name_len, 0, + UBI_VOL_NAME_MAX + 1 - re->new_name_len); + crc = crc32(UBI_CRC32_INIT, vtbl_rec, + UBI_VTBL_RECORD_SIZE_CRC); + vtbl_rec->crc = cpu_to_be32(crc); + } + + layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)]; + for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { + err = ubi_eba_unmap_leb(ubi, layout_vol, i); + if (err) + return err; + + err = ubi_eba_write_leb(ubi, layout_vol, i, ubi->vtbl, 0, + ubi->vtbl_size); if (err) return err; } - paranoid_vtbl_check(ubi); return 0; } /** - * vtbl_check - check if volume table is not corrupted and contains sensible - * data. + * vtbl_check - check if volume table is not corrupted and sensible. * @ubi: UBI device description object * @vtbl: volume table * @@ -132,13 +181,13 @@ static int vtbl_check(const struct ubi_device *ubi, upd_marker = vtbl[i].upd_marker; vol_type = vtbl[i].vol_type; name_len = be16_to_cpu(vtbl[i].name_len); - name = (const char *) &vtbl[i].name[0]; + name = &vtbl[i].name[0]; crc = crc32(UBI_CRC32_INIT, &vtbl[i], UBI_VTBL_RECORD_SIZE_CRC); if (be32_to_cpu(vtbl[i].crc) != crc) { ubi_err("bad CRC at record %u: %#08x, not %#08x", i, crc, be32_to_cpu(vtbl[i].crc)); - ubi_dbg_dump_vtbl_record(&vtbl[i], i); + ubi_dump_vtbl_record(&vtbl[i], i); return 1; } @@ -170,7 +219,7 @@ static int vtbl_check(const struct ubi_device *ubi, n = ubi->leb_size % alignment; if (data_pad != n) { - dbg_err("bad data_pad, has to be %d", n); + ubi_err("bad data_pad, has to be %d", n); err = 6; goto bad; } @@ -186,8 +235,8 @@ static int vtbl_check(const struct ubi_device *ubi, } if (reserved_pebs > ubi->good_peb_count) { - dbg_err("too large reserved_pebs, good PEBs %d", - ubi->good_peb_count); + ubi_err("too large reserved_pebs %d, good PEBs %d", + reserved_pebs, ubi->good_peb_count); err = 9; goto bad; } @@ -215,11 +264,15 @@ static int vtbl_check(const struct ubi_device *ubi, int len2 = be16_to_cpu(vtbl[n].name_len); if (len1 > 0 && len1 == len2 && - !strncmp((char *)vtbl[i].name, (char *)vtbl[n].name, len1)) { - ubi_err("volumes %d and %d have the same name" - " \"%s\"", i, n, vtbl[i].name); - ubi_dbg_dump_vtbl_record(&vtbl[i], i); - ubi_dbg_dump_vtbl_record(&vtbl[n], n); +#ifndef __UBOOT__ + !strncmp(vtbl[i].name, vtbl[n].name, len1)) { +#else + !strncmp((char *)vtbl[i].name, vtbl[n].name, len1)) { +#endif + ubi_err("volumes %d and %d have the same name \"%s\"", + i, n, vtbl[i].name); + ubi_dump_vtbl_record(&vtbl[i], i); + ubi_dump_vtbl_record(&vtbl[n], n); return -EINVAL; } } @@ -229,76 +282,64 @@ static int vtbl_check(const struct ubi_device *ubi, bad: ubi_err("volume table check failed: record %d, error %d", i, err); - ubi_dbg_dump_vtbl_record(&vtbl[i], i); + ubi_dump_vtbl_record(&vtbl[i], i); return -EINVAL; } /** * create_vtbl - create a copy of volume table. * @ubi: UBI device description object - * @si: scanning information + * @ai: attaching information * @copy: number of the volume table copy * @vtbl: contents of the volume table * * This function returns zero in case of success and a negative error code in * case of failure. */ -static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si, +static int create_vtbl(struct ubi_device *ubi, struct ubi_attach_info *ai, int copy, void *vtbl) { int err, tries = 0; - static struct ubi_vid_hdr *vid_hdr; - struct ubi_scan_volume *sv; - struct ubi_scan_leb *new_seb, *old_seb = NULL; + struct ubi_vid_hdr *vid_hdr; + struct ubi_ainf_peb *new_aeb; - ubi_msg("create volume table (copy #%d)", copy + 1); + dbg_gen("create volume table (copy #%d)", copy + 1); vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); if (!vid_hdr) return -ENOMEM; - /* - * Check if there is a logical eraseblock which would have to contain - * this volume table copy was found during scanning. It has to be wiped - * out. - */ - sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID); - if (sv) - old_seb = ubi_scan_find_seb(sv, copy); - retry: - new_seb = ubi_scan_get_free_peb(ubi, si); - if (IS_ERR(new_seb)) { - err = PTR_ERR(new_seb); + new_aeb = ubi_early_get_peb(ubi, ai); + if (IS_ERR(new_aeb)) { + err = PTR_ERR(new_aeb); goto out_free; } - vid_hdr->vol_type = UBI_VID_DYNAMIC; + vid_hdr->vol_type = UBI_LAYOUT_VOLUME_TYPE; vid_hdr->vol_id = cpu_to_be32(UBI_LAYOUT_VOLUME_ID); vid_hdr->compat = UBI_LAYOUT_VOLUME_COMPAT; vid_hdr->data_size = vid_hdr->used_ebs = vid_hdr->data_pad = cpu_to_be32(0); vid_hdr->lnum = cpu_to_be32(copy); - vid_hdr->sqnum = cpu_to_be64(++si->max_sqnum); - vid_hdr->leb_ver = cpu_to_be32(old_seb ? old_seb->leb_ver + 1: 0); + vid_hdr->sqnum = cpu_to_be64(++ai->max_sqnum); /* The EC header is already there, write the VID header */ - err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr); + err = ubi_io_write_vid_hdr(ubi, new_aeb->pnum, vid_hdr); if (err) goto write_error; /* Write the layout volume contents */ - err = ubi_io_write_data(ubi, vtbl, new_seb->pnum, 0, ubi->vtbl_size); + err = ubi_io_write_data(ubi, vtbl, new_aeb->pnum, 0, ubi->vtbl_size); if (err) goto write_error; /* - * And add it to the scanning information. Don't delete the old - * @old_seb as it will be deleted and freed in 'ubi_scan_add_used()'. + * And add it to the attaching information. Don't delete the old version + * of this LEB as it will be deleted and freed in 'ubi_add_to_av()'. */ - err = ubi_scan_add_used(ubi, si, new_seb->pnum, new_seb->ec, - vid_hdr, 0); - kfree(new_seb); + err = ubi_add_to_av(ubi, ai, new_aeb->pnum, new_aeb->ec, vid_hdr, 0); + kmem_cache_free(ai->aeb_slab_cache, new_aeb); ubi_free_vid_hdr(ubi, vid_hdr); return err; @@ -308,10 +349,10 @@ write_error: * Probably this physical eraseblock went bad, try to pick * another one. */ - list_add_tail(&new_seb->u.list, &si->corr); + list_add(&new_aeb->u.list, &ai->erase); goto retry; } - kfree(new_seb); + kmem_cache_free(ai->aeb_slab_cache, new_aeb); out_free: ubi_free_vid_hdr(ubi, vid_hdr); return err; @@ -321,20 +362,20 @@ out_free: /** * process_lvol - process the layout volume. * @ubi: UBI device description object - * @si: scanning information - * @sv: layout volume scanning information + * @ai: attaching information + * @av: layout volume attaching information * * This function is responsible for reading the layout volume, ensuring it is * not corrupted, and recovering from corruptions if needed. Returns volume * table in case of success and a negative error code in case of failure. */ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi, - struct ubi_scan_info *si, - struct ubi_scan_volume *sv) + struct ubi_attach_info *ai, + struct ubi_ainf_volume *av) { int err; struct rb_node *rb; - struct ubi_scan_leb *seb; + struct ubi_ainf_peb *aeb; struct ubi_vtbl_record *leb[UBI_LAYOUT_VOLUME_EBS] = { NULL, NULL }; int leb_corrupted[UBI_LAYOUT_VOLUME_EBS] = {1, 1}; @@ -356,25 +397,24 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi, * 0 contains more recent information. * * So the plan is to first check LEB 0. Then - * a. if LEB 0 is OK, it must be containing the most resent data; then + * a. if LEB 0 is OK, it must be containing the most recent data; then * we compare it with LEB 1, and if they are different, we copy LEB * 0 to LEB 1; * b. if LEB 0 is corrupted, but LEB 1 has to be OK, and we copy LEB 1 * to LEB 0. */ - dbg_msg("check layout volume"); + dbg_gen("check layout volume"); /* Read both LEB 0 and LEB 1 into memory */ - ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) { - leb[seb->lnum] = vmalloc(ubi->vtbl_size); - if (!leb[seb->lnum]) { + ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) { + leb[aeb->lnum] = vzalloc(ubi->vtbl_size); + if (!leb[aeb->lnum]) { err = -ENOMEM; goto out_free; } - memset(leb[seb->lnum], 0, ubi->vtbl_size); - err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0, + err = ubi_io_read_data(ubi, leb[aeb->lnum], aeb->pnum, 0, ubi->vtbl_size); if (err == UBI_IO_BITFLIPS || mtd_is_eccerr(err)) /* @@ -382,12 +422,12 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi, * uncorrectable ECC error, but we have our own CRC and * the data will be checked later. If the data is OK, * the PEB will be scrubbed (because we set - * seb->scrub). If the data is not OK, the contents of + * aeb->scrub). If the data is not OK, the contents of * the PEB will be recovered from the second copy, and - * seb->scrub will be cleared in - * 'ubi_scan_add_used()'. + * aeb->scrub will be cleared in + * 'ubi_add_to_av()'. */ - seb->scrub = 1; + aeb->scrub = 1; else if (err) goto out_free; } @@ -402,10 +442,11 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi, if (!leb_corrupted[0]) { /* LEB 0 is OK */ if (leb[1]) - leb_corrupted[1] = memcmp(leb[0], leb[1], ubi->vtbl_size); + leb_corrupted[1] = memcmp(leb[0], leb[1], + ubi->vtbl_size); if (leb_corrupted[1]) { ubi_warn("volume table copy #2 is corrupted"); - err = create_vtbl(ubi, si, 1, leb[0]); + err = create_vtbl(ubi, ai, 1, leb[0]); if (err) goto out_free; ubi_msg("volume table was restored"); @@ -428,7 +469,7 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi, } ubi_warn("volume table copy #1 is corrupted"); - err = create_vtbl(ubi, si, 0, leb[1]); + err = create_vtbl(ubi, ai, 0, leb[1]); if (err) goto out_free; ubi_msg("volume table was restored"); @@ -446,21 +487,20 @@ out_free: /** * create_empty_lvol - create empty layout volume. * @ubi: UBI device description object - * @si: scanning information + * @ai: attaching information * * This function returns volume table contents in case of success and a * negative error code in case of failure. */ static struct ubi_vtbl_record *create_empty_lvol(struct ubi_device *ubi, - struct ubi_scan_info *si) + struct ubi_attach_info *ai) { int i; struct ubi_vtbl_record *vtbl; - vtbl = vmalloc(ubi->vtbl_size); + vtbl = vzalloc(ubi->vtbl_size); if (!vtbl) return ERR_PTR(-ENOMEM); - memset(vtbl, 0, ubi->vtbl_size); for (i = 0; i < ubi->vtbl_slots; i++) memcpy(&vtbl[i], &empty_vtbl_record, UBI_VTBL_RECORD_SIZE); @@ -468,7 +508,7 @@ static struct ubi_vtbl_record *create_empty_lvol(struct ubi_device *ubi, for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { int err; - err = create_vtbl(ubi, si, i, vtbl); + err = create_vtbl(ubi, ai, i, vtbl); if (err) { vfree(vtbl); return ERR_PTR(err); @@ -481,18 +521,19 @@ static struct ubi_vtbl_record *create_empty_lvol(struct ubi_device *ubi, /** * init_volumes - initialize volume information for existing volumes. * @ubi: UBI device description object - * @si: scanning information + * @ai: scanning information * @vtbl: volume table * * This function allocates volume description objects for existing volumes. * Returns zero in case of success and a negative error code in case of * failure. */ -static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si, +static int init_volumes(struct ubi_device *ubi, + const struct ubi_attach_info *ai, const struct ubi_vtbl_record *vtbl) { int i, reserved_pebs = 0; - struct ubi_scan_volume *sv; + struct ubi_ainf_volume *av; struct ubi_volume *vol; for (i = 0; i < ubi->vtbl_slots; i++) { @@ -520,8 +561,8 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si, if (vtbl[i].flags & UBI_VTBL_AUTORESIZE_FLG) { /* Auto re-size flag may be set only for one volume */ if (ubi->autoresize_vol_id != -1) { - ubi_err("more then one auto-resize volume (%d " - "and %d)", ubi->autoresize_vol_id, i); + ubi_err("more than one auto-resize volume (%d and %d)", + ubi->autoresize_vol_id, i); kfree(vol); return -EINVAL; } @@ -548,8 +589,8 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si, } /* Static volumes only */ - sv = ubi_scan_find_sv(si, i); - if (!sv) { + av = ubi_find_av(ai, i); + if (!av) { /* * No eraseblocks belonging to this volume found. We * don't actually know whether this static volume is @@ -561,22 +602,22 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si, continue; } - if (sv->leb_count != sv->used_ebs) { + if (av->leb_count != av->used_ebs) { /* * We found a static volume which misses several * eraseblocks. Treat it as corrupted. */ ubi_warn("static volume %d misses %d LEBs - corrupted", - sv->vol_id, sv->used_ebs - sv->leb_count); + av->vol_id, av->used_ebs - av->leb_count); vol->corrupted = 1; continue; } - vol->used_ebs = sv->used_ebs; + vol->used_ebs = av->used_ebs; vol->used_bytes = (long long)(vol->used_ebs - 1) * vol->usable_leb_size; - vol->used_bytes += sv->last_data_size; - vol->last_eb_bytes = sv->last_data_size; + vol->used_bytes += av->last_data_size; + vol->last_eb_bytes = av->last_data_size; } /* And add the layout volume */ @@ -585,7 +626,7 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si, return -ENOMEM; vol->reserved_pebs = UBI_LAYOUT_VOLUME_EBS; - vol->alignment = 1; + vol->alignment = UBI_LAYOUT_VOLUME_ALIGN; vol->vol_type = UBI_DYNAMIC_VOLUME; vol->name_len = sizeof(UBI_LAYOUT_VOLUME_NAME) - 1; memcpy(vol->name, UBI_LAYOUT_VOLUME_NAME, vol->name_len + 1); @@ -603,9 +644,13 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si, ubi->vol_count += 1; vol->ubi = ubi; - if (reserved_pebs > ubi->avail_pebs) + if (reserved_pebs > ubi->avail_pebs) { ubi_err("not enough PEBs, required %d, available %d", reserved_pebs, ubi->avail_pebs); + if (ubi->corr_peb_count) + ubi_err("%d PEBs are corrupted and not used", + ubi->corr_peb_count); + } ubi->rsvd_pebs += reserved_pebs; ubi->avail_pebs -= reserved_pebs; @@ -613,105 +658,104 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si, } /** - * check_sv - check volume scanning information. + * check_av - check volume attaching information. * @vol: UBI volume description object - * @sv: volume scanning information + * @av: volume attaching information * - * This function returns zero if the volume scanning information is consistent + * This function returns zero if the volume attaching information is consistent * to the data read from the volume tabla, and %-EINVAL if not. */ -static int check_sv(const struct ubi_volume *vol, - const struct ubi_scan_volume *sv) +static int check_av(const struct ubi_volume *vol, + const struct ubi_ainf_volume *av) { int err; - if (sv->highest_lnum >= vol->reserved_pebs) { + if (av->highest_lnum >= vol->reserved_pebs) { err = 1; goto bad; } - if (sv->leb_count > vol->reserved_pebs) { + if (av->leb_count > vol->reserved_pebs) { err = 2; goto bad; } - if (sv->vol_type != vol->vol_type) { + if (av->vol_type != vol->vol_type) { err = 3; goto bad; } - if (sv->used_ebs > vol->reserved_pebs) { + if (av->used_ebs > vol->reserved_pebs) { err = 4; goto bad; } - if (sv->data_pad != vol->data_pad) { + if (av->data_pad != vol->data_pad) { err = 5; goto bad; } return 0; bad: - ubi_err("bad scanning information, error %d", err); - ubi_dbg_dump_sv(sv); - ubi_dbg_dump_vol_info(vol); + ubi_err("bad attaching information, error %d", err); + ubi_dump_av(av); + ubi_dump_vol_info(vol); return -EINVAL; } /** - * check_scanning_info - check that scanning information. + * check_attaching_info - check that attaching information. * @ubi: UBI device description object - * @si: scanning information + * @ai: attaching information * * Even though we protect on-flash data by CRC checksums, we still don't trust - * the media. This function ensures that scanning information is consistent to - * the information read from the volume table. Returns zero if the scanning + * the media. This function ensures that attaching information is consistent to + * the information read from the volume table. Returns zero if the attaching * information is OK and %-EINVAL if it is not. */ -static int check_scanning_info(const struct ubi_device *ubi, - struct ubi_scan_info *si) +static int check_attaching_info(const struct ubi_device *ubi, + struct ubi_attach_info *ai) { int err, i; - struct ubi_scan_volume *sv; + struct ubi_ainf_volume *av; struct ubi_volume *vol; - if (si->vols_found > UBI_INT_VOL_COUNT + ubi->vtbl_slots) { - ubi_err("scanning found %d volumes, maximum is %d + %d", - si->vols_found, UBI_INT_VOL_COUNT, ubi->vtbl_slots); + if (ai->vols_found > UBI_INT_VOL_COUNT + ubi->vtbl_slots) { + ubi_err("found %d volumes while attaching, maximum is %d + %d", + ai->vols_found, UBI_INT_VOL_COUNT, ubi->vtbl_slots); return -EINVAL; } - if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT && - si->highest_vol_id < UBI_INTERNAL_VOL_START) { - ubi_err("too large volume ID %d found by scanning", - si->highest_vol_id); + if (ai->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT && + ai->highest_vol_id < UBI_INTERNAL_VOL_START) { + ubi_err("too large volume ID %d found", ai->highest_vol_id); return -EINVAL; } for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { cond_resched(); - sv = ubi_scan_find_sv(si, i); + av = ubi_find_av(ai, i); vol = ubi->volumes[i]; if (!vol) { - if (sv) - ubi_scan_rm_volume(si, sv); + if (av) + ubi_remove_av(ai, av); continue; } if (vol->reserved_pebs == 0) { ubi_assert(i < ubi->vtbl_slots); - if (!sv) + if (!av) continue; /* - * During scanning we found a volume which does not + * During attaching we found a volume which does not * exist according to the information in the volume * table. This must have happened due to an unclean * reboot while the volume was being removed. Discard * these eraseblocks. */ - ubi_msg("finish volume %d removal", sv->vol_id); - ubi_scan_rm_volume(si, sv); - } else if (sv) { - err = check_sv(vol, sv); + ubi_msg("finish volume %d removal", av->vol_id); + ubi_remove_av(ai, av); + } else if (av) { + err = check_av(vol, av); if (err) return err; } @@ -721,19 +765,18 @@ static int check_scanning_info(const struct ubi_device *ubi, } /** - * ubi_read_volume_table - read volume table. - * information. + * ubi_read_volume_table - read the volume table. * @ubi: UBI device description object - * @si: scanning information + * @ai: attaching information * * This function reads volume table, checks it, recover from errors if needed, * or creates it if needed. Returns zero in case of success and a negative * error code in case of failure. */ -int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si) +int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_attach_info *ai) { int i, err; - struct ubi_scan_volume *sv; + struct ubi_ainf_volume *av; empty_vtbl_record.crc = cpu_to_be32(0xf116c36b); @@ -748,8 +791,8 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si) ubi->vtbl_size = ubi->vtbl_slots * UBI_VTBL_RECORD_SIZE; ubi->vtbl_size = ALIGN(ubi->vtbl_size, ubi->min_io_size); - sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID); - if (!sv) { + av = ubi_find_av(ai, UBI_LAYOUT_VOLUME_ID); + if (!av) { /* * No logical eraseblocks belonging to the layout volume were * found. This could mean that the flash is just empty. In @@ -758,8 +801,8 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si) * But if flash is not empty this must be a corruption or the * MTD device just contains garbage. */ - if (si->is_empty) { - ubi->vtbl = create_empty_lvol(ubi, si); + if (ai->is_empty) { + ubi->vtbl = create_empty_lvol(ubi, ai); if (IS_ERR(ubi->vtbl)) return PTR_ERR(ubi->vtbl); } else { @@ -767,33 +810,33 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si) return -EINVAL; } } else { - if (sv->leb_count > UBI_LAYOUT_VOLUME_EBS) { + if (av->leb_count > UBI_LAYOUT_VOLUME_EBS) { /* This must not happen with proper UBI images */ - dbg_err("too many LEBs (%d) in layout volume", - sv->leb_count); + ubi_err("too many LEBs (%d) in layout volume", + av->leb_count); return -EINVAL; } - ubi->vtbl = process_lvol(ubi, si, sv); + ubi->vtbl = process_lvol(ubi, ai, av); if (IS_ERR(ubi->vtbl)) return PTR_ERR(ubi->vtbl); } - ubi->avail_pebs = ubi->good_peb_count; + ubi->avail_pebs = ubi->good_peb_count - ubi->corr_peb_count; /* * The layout volume is OK, initialize the corresponding in-RAM data * structures. */ - err = init_volumes(ubi, si, ubi->vtbl); + err = init_volumes(ubi, ai, ubi->vtbl); if (err) goto out_free; /* - * Get sure that the scanning information is consistent to the + * Make sure that the attaching information is consistent to the * information stored in the volume table. */ - err = check_scanning_info(ubi, si); + err = check_attaching_info(ubi, ai); if (err) goto out_free; @@ -801,26 +844,24 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si) out_free: vfree(ubi->vtbl); - for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) - if (ubi->volumes[i]) { - kfree(ubi->volumes[i]); - ubi->volumes[i] = NULL; - } + for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { + kfree(ubi->volumes[i]); + ubi->volumes[i] = NULL; + } return err; } -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID - /** - * paranoid_vtbl_check - check volume table. + * self_vtbl_check - check volume table. * @ubi: UBI device description object */ -static void paranoid_vtbl_check(const struct ubi_device *ubi) +static void self_vtbl_check(const struct ubi_device *ubi) { + if (!ubi_dbg_chk_gen(ubi)) + return; + if (vtbl_check(ubi, ubi->vtbl)) { - ubi_err("paranoid check failed"); + ubi_err("self-check failed"); BUG(); } } - -#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 1eaa88b..2987ffc 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -7,97 +7,117 @@ */ /* - * UBI wear-leveling unit. + * UBI wear-leveling sub-system. * - * This unit is responsible for wear-leveling. It works in terms of physical - * eraseblocks and erase counters and knows nothing about logical eraseblocks, - * volumes, etc. From this unit's perspective all physical eraseblocks are of - * two types - used and free. Used physical eraseblocks are those that were - * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are - * those that were put by the 'ubi_wl_put_peb()' function. + * This sub-system is responsible for wear-leveling. It works in terms of + * physical eraseblocks and erase counters and knows nothing about logical + * eraseblocks, volumes, etc. From this sub-system's perspective all physical + * eraseblocks are of two types - used and free. Used physical eraseblocks are + * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical + * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function. * * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter - * header. The rest of the physical eraseblock contains only 0xFF bytes. + * header. The rest of the physical eraseblock contains only %0xFF bytes. * - * When physical eraseblocks are returned to the WL unit by means of the + * When physical eraseblocks are returned to the WL sub-system by means of the * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is * done asynchronously in context of the per-UBI device background thread, - * which is also managed by the WL unit. + * which is also managed by the WL sub-system. * * The wear-leveling is ensured by means of moving the contents of used * physical eraseblocks with low erase counter to free physical eraseblocks * with high erase counter. * - * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick - * an "optimal" physical eraseblock. For example, when it is known that the - * physical eraseblock will be "put" soon because it contains short-term data, - * the WL unit may pick a free physical eraseblock with low erase counter, and - * so forth. + * If the WL sub-system fails to erase a physical eraseblock, it marks it as + * bad. * - * If the WL unit fails to erase a physical eraseblock, it marks it as bad. + * This sub-system is also responsible for scrubbing. If a bit-flip is detected + * in a physical eraseblock, it has to be moved. Technically this is the same + * as moving it for wear-leveling reasons. * - * This unit is also responsible for scrubbing. If a bit-flip is detected in a - * physical eraseblock, it has to be moved. Technically this is the same as - * moving it for wear-leveling reasons. + * As it was said, for the UBI sub-system all physical eraseblocks are either + * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while + * used eraseblocks are kept in @wl->used, @wl->erroneous, or @wl->scrub + * RB-trees, as well as (temporarily) in the @wl->pq queue. * - * As it was said, for the UBI unit all physical eraseblocks are either "free" - * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used - * eraseblocks are kept in a set of different RB-trees: @wl->used, - * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub. + * When the WL sub-system returns a physical eraseblock, the physical + * eraseblock is protected from being moved for some "time". For this reason, + * the physical eraseblock is not directly moved from the @wl->free tree to the + * @wl->used tree. There is a protection queue in between where this + * physical eraseblock is temporarily stored (@wl->pq). + * + * All this protection stuff is needed because: + * o we don't want to move physical eraseblocks just after we have given them + * to the user; instead, we first want to let users fill them up with data; + * + * o there is a chance that the user will put the physical eraseblock very + * soon, so it makes sense not to move it for some time, but wait. + * + * Physical eraseblocks stay protected only for limited time. But the "time" is + * measured in erase cycles in this case. This is implemented with help of the + * protection queue. Eraseblocks are put to the tail of this queue when they + * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the + * head of the queue on each erase operation (for any eraseblock). So the + * length of the queue defines how may (global) erase cycles PEBs are protected. + * + * To put it differently, each physical eraseblock has 2 main states: free and + * used. The former state corresponds to the @wl->free tree. The latter state + * is split up on several sub-states: + * o the WL movement is allowed (@wl->used tree); + * o the WL movement is disallowed (@wl->erroneous) because the PEB is + * erroneous - e.g., there was a read error; + * o the WL movement is temporarily prohibited (@wl->pq queue); + * o scrubbing is needed (@wl->scrub tree). + * + * Depending on the sub-state, wear-leveling entries of the used physical + * eraseblocks may be kept in one of those structures. * * Note, in this implementation, we keep a small in-RAM object for each physical * eraseblock. This is surely not a scalable solution. But it appears to be good * enough for moderately large flashes and it is simple. In future, one may - * re-work this unit and make it more scalable. + * re-work this sub-system and make it more scalable. * - * At the moment this unit does not utilize the sequence number, which was - * introduced relatively recently. But it would be wise to do this because the - * sequence number of a logical eraseblock characterizes how old is it. For + * At the moment this sub-system does not utilize the sequence number, which + * was introduced relatively recently. But it would be wise to do this because + * the sequence number of a logical eraseblock characterizes how old is it. For * example, when we move a PEB with low erase counter, and we need to pick the * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we * pick target PEB with an average EC if our PEB is not very "old". This is a - * room for future re-works of the WL unit. - * - * FIXME: looks too complex, should be simplified (later). + * room for future re-works of the WL sub-system. */ -#ifdef UBI_LINUX +#define __UBOOT__ +#ifndef __UBOOT__ #include <linux/slab.h> #include <linux/crc32.h> #include <linux/freezer.h> #include <linux/kthread.h> +#else +#include <ubi_uboot.h> #endif -#include <ubi_uboot.h> #include "ubi.h" /* Number of physical eraseblocks reserved for wear-leveling purposes */ #define WL_RESERVED_PEBS 1 /* - * How many erase cycles are short term, unknown, and long term physical - * eraseblocks protected. - */ -#define ST_PROTECTION 16 -#define U_PROTECTION 10 -#define LT_PROTECTION 4 - -/* * Maximum difference between two erase counters. If this threshold is - * exceeded, the WL unit starts moving data from used physical eraseblocks with - * low erase counter to free physical eraseblocks with high erase counter. + * exceeded, the WL sub-system starts moving data from used physical + * eraseblocks with low erase counter to free physical eraseblocks with high + * erase counter. */ #define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD /* - * When a physical eraseblock is moved, the WL unit has to pick the target + * When a physical eraseblock is moved, the WL sub-system has to pick the target * physical eraseblock to move to. The simplest way would be just to pick the * one with the highest erase counter. But in certain workloads this could lead * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a * situation when the picked physical eraseblock is constantly erased after the * data is written to it. So, we have a constant which limits the highest erase - * counter of the free physical eraseblock to pick. Namely, the WL unit does - * not pick eraseblocks with erase counter greater then the lowest erase + * counter of the free physical eraseblock to pick. Namely, the WL sub-system + * does not pick eraseblocks with erase counter greater than the lowest erase * counter plus %WL_FREE_MAX_DIFF. */ #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) @@ -108,89 +128,48 @@ */ #define WL_MAX_FAILURES 32 +static int self_check_ec(struct ubi_device *ubi, int pnum, int ec); +static int self_check_in_wl_tree(const struct ubi_device *ubi, + struct ubi_wl_entry *e, struct rb_root *root); +static int self_check_in_pq(const struct ubi_device *ubi, + struct ubi_wl_entry *e); + +#ifdef CONFIG_MTD_UBI_FASTMAP +#ifndef __UBOOT__ /** - * struct ubi_wl_prot_entry - PEB protection entry. - * @rb_pnum: link in the @wl->prot.pnum RB-tree - * @rb_aec: link in the @wl->prot.aec RB-tree - * @abs_ec: the absolute erase counter value when the protection ends - * @e: the wear-leveling entry of the physical eraseblock under protection - * - * When the WL unit returns a physical eraseblock, the physical eraseblock is - * protected from being moved for some "time". For this reason, the physical - * eraseblock is not directly moved from the @wl->free tree to the @wl->used - * tree. There is one more tree in between where this physical eraseblock is - * temporarily stored (@wl->prot). - * - * All this protection stuff is needed because: - * o we don't want to move physical eraseblocks just after we have given them - * to the user; instead, we first want to let users fill them up with data; - * - * o there is a chance that the user will put the physical eraseblock very - * soon, so it makes sense not to move it for some time, but wait; this is - * especially important in case of "short term" physical eraseblocks. - * - * Physical eraseblocks stay protected only for limited time. But the "time" is - * measured in erase cycles in this case. This is implemented with help of the - * absolute erase counter (@wl->abs_ec). When it reaches certain value, the - * physical eraseblocks are moved from the protection trees (@wl->prot.*) to - * the @wl->used tree. - * - * Protected physical eraseblocks are searched by physical eraseblock number - * (when they are put) and by the absolute erase counter (to check if it is - * time to move them to the @wl->used tree). So there are actually 2 RB-trees - * storing the protected physical eraseblocks: @wl->prot.pnum and - * @wl->prot.aec. They are referred to as the "protection" trees. The - * first one is indexed by the physical eraseblock number. The second one is - * indexed by the absolute erase counter. Both trees store - * &struct ubi_wl_prot_entry objects. - * - * Each physical eraseblock has 2 main states: free and used. The former state - * corresponds to the @wl->free tree. The latter state is split up on several - * sub-states: - * o the WL movement is allowed (@wl->used tree); - * o the WL movement is temporarily prohibited (@wl->prot.pnum and - * @wl->prot.aec trees); - * o scrubbing is needed (@wl->scrub tree). - * - * Depending on the sub-state, wear-leveling entries of the used physical - * eraseblocks may be kept in one of those trees. + * update_fastmap_work_fn - calls ubi_update_fastmap from a work queue + * @wrk: the work description object */ -struct ubi_wl_prot_entry { - struct rb_node rb_pnum; - struct rb_node rb_aec; - unsigned long long abs_ec; - struct ubi_wl_entry *e; -}; +static void update_fastmap_work_fn(struct work_struct *wrk) +{ + struct ubi_device *ubi = container_of(wrk, struct ubi_device, fm_work); + ubi_update_fastmap(ubi); +} +#endif /** - * struct ubi_work - UBI work description data structure. - * @list: a link in the list of pending works - * @func: worker function - * @priv: private data of the worker function - * - * @e: physical eraseblock to erase - * @torture: if the physical eraseblock has to be tortured - * - * The @func pointer points to the worker function. If the @cancel argument is - * not zero, the worker has to free the resources and exit immediately. The - * worker has to return zero in case of success and a negative error code in - * case of failure. + * ubi_ubi_is_fm_block - returns 1 if a PEB is currently used in a fastmap. + * @ubi: UBI device description object + * @pnum: the to be checked PEB */ -struct ubi_work { - struct list_head list; - int (*func)(struct ubi_device *ubi, struct ubi_work *wrk, int cancel); - /* The below fields are only relevant to erasure works */ - struct ubi_wl_entry *e; - int torture; -}; +static int ubi_is_fm_block(struct ubi_device *ubi, int pnum) +{ + int i; + + if (!ubi->fm) + return 0; + + for (i = 0; i < ubi->fm->used_blocks; i++) + if (ubi->fm->e[i]->pnum == pnum) + return 1; -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID -static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec); -static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, - struct rb_root *root); + return 0; +} #else -#define paranoid_check_ec(ubi, pnum, ec) 0 -#define paranoid_check_in_wl_tree(e, root) +static int ubi_is_fm_block(struct ubi_device *ubi, int pnum) +{ + return 0; +} #endif /** @@ -210,7 +189,7 @@ static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root) struct ubi_wl_entry *e1; parent = *p; - e1 = rb_entry(parent, struct ubi_wl_entry, rb); + e1 = rb_entry(parent, struct ubi_wl_entry, u.rb); if (e->ec < e1->ec) p = &(*p)->rb_left; @@ -225,8 +204,8 @@ static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root) } } - rb_link_node(&e->rb, parent, p); - rb_insert_color(&e->rb, root); + rb_link_node(&e->u.rb, parent, p); + rb_insert_color(&e->u.rb, root); } /** @@ -289,18 +268,16 @@ static int produce_free_peb(struct ubi_device *ubi) { int err; - spin_lock(&ubi->wl_lock); while (!ubi->free.rb_node) { spin_unlock(&ubi->wl_lock); dbg_wl("do one work synchronously"); err = do_work(ubi); - if (err) - return err; spin_lock(&ubi->wl_lock); + if (err) + return err; } - spin_unlock(&ubi->wl_lock); return 0; } @@ -321,7 +298,7 @@ static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root) while (p) { struct ubi_wl_entry *e1; - e1 = rb_entry(p, struct ubi_wl_entry, rb); + e1 = rb_entry(p, struct ubi_wl_entry, u.rb); if (e->pnum == e1->pnum) { ubi_assert(e == e1); @@ -345,223 +322,401 @@ static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root) } /** - * prot_tree_add - add physical eraseblock to protection trees. + * prot_queue_add - add physical eraseblock to the protection queue. * @ubi: UBI device description object * @e: the physical eraseblock to add - * @pe: protection entry object to use - * @abs_ec: absolute erase counter value when this physical eraseblock has - * to be removed from the protection trees. * - * @wl->lock has to be locked. + * This function adds @e to the tail of the protection queue @ubi->pq, where + * @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be + * temporarily protected from the wear-leveling worker. Note, @wl->lock has to + * be locked. */ -static void prot_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e, - struct ubi_wl_prot_entry *pe, int abs_ec) +static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e) { - struct rb_node **p, *parent = NULL; - struct ubi_wl_prot_entry *pe1; - - pe->e = e; - pe->abs_ec = ubi->abs_ec + abs_ec; - - p = &ubi->prot.pnum.rb_node; - while (*p) { - parent = *p; - pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_pnum); + int pq_tail = ubi->pq_head - 1; - if (e->pnum < pe1->e->pnum) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - rb_link_node(&pe->rb_pnum, parent, p); - rb_insert_color(&pe->rb_pnum, &ubi->prot.pnum); - - p = &ubi->prot.aec.rb_node; - parent = NULL; - while (*p) { - parent = *p; - pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_aec); - - if (pe->abs_ec < pe1->abs_ec) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - rb_link_node(&pe->rb_aec, parent, p); - rb_insert_color(&pe->rb_aec, &ubi->prot.aec); + if (pq_tail < 0) + pq_tail = UBI_PROT_QUEUE_LEN - 1; + ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN); + list_add_tail(&e->u.list, &ubi->pq[pq_tail]); + dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec); } /** * find_wl_entry - find wear-leveling entry closest to certain erase counter. + * @ubi: UBI device description object * @root: the RB-tree where to look for - * @max: highest possible erase counter + * @diff: maximum possible difference from the smallest erase counter * * This function looks for a wear leveling entry with erase counter closest to - * @max and less then @max. + * min + @diff, where min is the smallest erase counter. */ -static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max) +static struct ubi_wl_entry *find_wl_entry(struct ubi_device *ubi, + struct rb_root *root, int diff) { struct rb_node *p; - struct ubi_wl_entry *e; + struct ubi_wl_entry *e, *prev_e = NULL; + int max; - e = rb_entry(rb_first(root), struct ubi_wl_entry, rb); - max += e->ec; + e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); + max = e->ec + diff; p = root->rb_node; while (p) { struct ubi_wl_entry *e1; - e1 = rb_entry(p, struct ubi_wl_entry, rb); + e1 = rb_entry(p, struct ubi_wl_entry, u.rb); if (e1->ec >= max) p = p->rb_left; else { p = p->rb_right; + prev_e = e; e = e1; } } + /* If no fastmap has been written and this WL entry can be used + * as anchor PEB, hold it back and return the second best WL entry + * such that fastmap can use the anchor PEB later. */ + if (prev_e && !ubi->fm_disabled && + !ubi->fm && e->pnum < UBI_FM_MAX_START) + return prev_e; + return e; } /** - * ubi_wl_get_peb - get a physical eraseblock. + * find_mean_wl_entry - find wear-leveling entry with medium erase counter. * @ubi: UBI device description object - * @dtype: type of data which will be stored in this physical eraseblock + * @root: the RB-tree where to look for * - * This function returns a physical eraseblock in case of success and a - * negative error code in case of failure. Might sleep. + * This function looks for a wear leveling entry with medium erase counter, + * but not greater or equivalent than the lowest erase counter plus + * %WL_FREE_MAX_DIFF/2. */ -int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) +static struct ubi_wl_entry *find_mean_wl_entry(struct ubi_device *ubi, + struct rb_root *root) { - int err, protect, medium_ec; struct ubi_wl_entry *e, *first, *last; - struct ubi_wl_prot_entry *pe; - ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM || - dtype == UBI_UNKNOWN); + first = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); + last = rb_entry(rb_last(root), struct ubi_wl_entry, u.rb); - pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); - if (!pe) - return -ENOMEM; + if (last->ec - first->ec < WL_FREE_MAX_DIFF) { + e = rb_entry(root->rb_node, struct ubi_wl_entry, u.rb); + +#ifdef CONFIG_MTD_UBI_FASTMAP + /* If no fastmap has been written and this WL entry can be used + * as anchor PEB, hold it back and return the second best + * WL entry such that fastmap can use the anchor PEB later. */ + if (e && !ubi->fm_disabled && !ubi->fm && + e->pnum < UBI_FM_MAX_START) + e = rb_entry(rb_next(root->rb_node), + struct ubi_wl_entry, u.rb); +#endif + } else + e = find_wl_entry(ubi, root, WL_FREE_MAX_DIFF/2); + + return e; +} + +#ifdef CONFIG_MTD_UBI_FASTMAP +/** + * find_anchor_wl_entry - find wear-leveling entry to used as anchor PEB. + * @root: the RB-tree where to look for + */ +static struct ubi_wl_entry *find_anchor_wl_entry(struct rb_root *root) +{ + struct rb_node *p; + struct ubi_wl_entry *e, *victim = NULL; + int max_ec = UBI_MAX_ERASECOUNTER; + + ubi_rb_for_each_entry(p, e, root, u.rb) { + if (e->pnum < UBI_FM_MAX_START && e->ec < max_ec) { + victim = e; + max_ec = e->ec; + } + } + + return victim; +} + +static int anchor_pebs_avalible(struct rb_root *root) +{ + struct rb_node *p; + struct ubi_wl_entry *e; + + ubi_rb_for_each_entry(p, e, root, u.rb) + if (e->pnum < UBI_FM_MAX_START) + return 1; + + return 0; +} + +/** + * ubi_wl_get_fm_peb - find a physical erase block with a given maximal number. + * @ubi: UBI device description object + * @anchor: This PEB will be used as anchor PEB by fastmap + * + * The function returns a physical erase block with a given maximal number + * and removes it from the wl subsystem. + * Must be called with wl_lock held! + */ +struct ubi_wl_entry *ubi_wl_get_fm_peb(struct ubi_device *ubi, int anchor) +{ + struct ubi_wl_entry *e = NULL; + + if (!ubi->free.rb_node || (ubi->free_count - ubi->beb_rsvd_pebs < 1)) + goto out; + + if (anchor) + e = find_anchor_wl_entry(&ubi->free); + else + e = find_mean_wl_entry(ubi, &ubi->free); + + if (!e) + goto out; + + self_check_in_wl_tree(ubi, e, &ubi->free); + + /* remove it from the free list, + * the wl subsystem does no longer know this erase block */ + rb_erase(&e->u.rb, &ubi->free); + ubi->free_count--; +out: + return e; +} +#endif + +/** + * __wl_get_peb - get a physical eraseblock. + * @ubi: UBI device description object + * + * This function returns a physical eraseblock in case of success and a + * negative error code in case of failure. + */ +static int __wl_get_peb(struct ubi_device *ubi) +{ + int err; + struct ubi_wl_entry *e; retry: - spin_lock(&ubi->wl_lock); if (!ubi->free.rb_node) { if (ubi->works_count == 0) { - ubi_assert(list_empty(&ubi->works)); ubi_err("no free eraseblocks"); - spin_unlock(&ubi->wl_lock); - kfree(pe); + ubi_assert(list_empty(&ubi->works)); return -ENOSPC; } - spin_unlock(&ubi->wl_lock); err = produce_free_peb(ubi); - if (err < 0) { - kfree(pe); + if (err < 0) return err; - } goto retry; } - switch (dtype) { - case UBI_LONGTERM: - /* - * For long term data we pick a physical eraseblock - * with high erase counter. But the highest erase - * counter we can pick is bounded by the the lowest - * erase counter plus %WL_FREE_MAX_DIFF. - */ - e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); - protect = LT_PROTECTION; - break; - case UBI_UNKNOWN: - /* - * For unknown data we pick a physical eraseblock with - * medium erase counter. But we by no means can pick a - * physical eraseblock with erase counter greater or - * equivalent than the lowest erase counter plus - * %WL_FREE_MAX_DIFF. - */ - first = rb_entry(rb_first(&ubi->free), - struct ubi_wl_entry, rb); - last = rb_entry(rb_last(&ubi->free), - struct ubi_wl_entry, rb); - - if (last->ec - first->ec < WL_FREE_MAX_DIFF) - e = rb_entry(ubi->free.rb_node, - struct ubi_wl_entry, rb); - else { - medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; - e = find_wl_entry(&ubi->free, medium_ec); - } - protect = U_PROTECTION; - break; - case UBI_SHORTTERM: - /* - * For short term data we pick a physical eraseblock - * with the lowest erase counter as we expect it will - * be erased soon. - */ - e = rb_entry(rb_first(&ubi->free), - struct ubi_wl_entry, rb); - protect = ST_PROTECTION; - break; - default: - protect = 0; - e = NULL; - BUG(); + e = find_mean_wl_entry(ubi, &ubi->free); + if (!e) { + ubi_err("no free eraseblocks"); + return -ENOSPC; } + self_check_in_wl_tree(ubi, e, &ubi->free); + /* - * Move the physical eraseblock to the protection trees where it will + * Move the physical eraseblock to the protection queue where it will * be protected from being moved for some time. */ - paranoid_check_in_wl_tree(e, &ubi->free); - rb_erase(&e->rb, &ubi->free); - prot_tree_add(ubi, e, pe, protect); + rb_erase(&e->u.rb, &ubi->free); + ubi->free_count--; + dbg_wl("PEB %d EC %d", e->pnum, e->ec); +#ifndef CONFIG_MTD_UBI_FASTMAP + /* We have to enqueue e only if fastmap is disabled, + * is fastmap enabled prot_queue_add() will be called by + * ubi_wl_get_peb() after removing e from the pool. */ + prot_queue_add(ubi, e); +#endif + return e->pnum; +} - dbg_wl("PEB %d EC %d, protection %d", e->pnum, e->ec, protect); - spin_unlock(&ubi->wl_lock); +#ifdef CONFIG_MTD_UBI_FASTMAP +/** + * return_unused_pool_pebs - returns unused PEB to the free tree. + * @ubi: UBI device description object + * @pool: fastmap pool description object + */ +static void return_unused_pool_pebs(struct ubi_device *ubi, + struct ubi_fm_pool *pool) +{ + int i; + struct ubi_wl_entry *e; - return e->pnum; + for (i = pool->used; i < pool->size; i++) { + e = ubi->lookuptbl[pool->pebs[i]]; + wl_tree_add(e, &ubi->free); + ubi->free_count++; + } } /** - * prot_tree_del - remove a physical eraseblock from the protection trees + * refill_wl_pool - refills all the fastmap pool used by the + * WL sub-system. * @ubi: UBI device description object - * @pnum: the physical eraseblock to remove + */ +static void refill_wl_pool(struct ubi_device *ubi) +{ + struct ubi_wl_entry *e; + struct ubi_fm_pool *pool = &ubi->fm_wl_pool; + + return_unused_pool_pebs(ubi, pool); + + for (pool->size = 0; pool->size < pool->max_size; pool->size++) { + if (!ubi->free.rb_node || + (ubi->free_count - ubi->beb_rsvd_pebs < 5)) + break; + + e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); + self_check_in_wl_tree(ubi, e, &ubi->free); + rb_erase(&e->u.rb, &ubi->free); + ubi->free_count--; + + pool->pebs[pool->size] = e->pnum; + } + pool->used = 0; +} + +/** + * refill_wl_user_pool - refills all the fastmap pool used by ubi_wl_get_peb. + * @ubi: UBI device description object + */ +static void refill_wl_user_pool(struct ubi_device *ubi) +{ + struct ubi_fm_pool *pool = &ubi->fm_pool; + + return_unused_pool_pebs(ubi, pool); + + for (pool->size = 0; pool->size < pool->max_size; pool->size++) { + pool->pebs[pool->size] = __wl_get_peb(ubi); + if (pool->pebs[pool->size] < 0) + break; + } + pool->used = 0; +} + +/** + * ubi_refill_pools - refills all fastmap PEB pools. + * @ubi: UBI device description object + */ +void ubi_refill_pools(struct ubi_device *ubi) +{ + spin_lock(&ubi->wl_lock); + refill_wl_pool(ubi); + refill_wl_user_pool(ubi); + spin_unlock(&ubi->wl_lock); +} + +/* ubi_wl_get_peb - works exaclty like __wl_get_peb but keeps track of + * the fastmap pool. + */ +int ubi_wl_get_peb(struct ubi_device *ubi) +{ + int ret; + struct ubi_fm_pool *pool = &ubi->fm_pool; + struct ubi_fm_pool *wl_pool = &ubi->fm_wl_pool; + + if (!pool->size || !wl_pool->size || pool->used == pool->size || + wl_pool->used == wl_pool->size) + ubi_update_fastmap(ubi); + + /* we got not a single free PEB */ + if (!pool->size) + ret = -ENOSPC; + else { + spin_lock(&ubi->wl_lock); + ret = pool->pebs[pool->used++]; + prot_queue_add(ubi, ubi->lookuptbl[ret]); + spin_unlock(&ubi->wl_lock); + } + + return ret; +} + +/* get_peb_for_wl - returns a PEB to be used internally by the WL sub-system. * - * This function returns PEB @pnum from the protection trees and returns zero - * in case of success and %-ENODEV if the PEB was not found in the protection - * trees. + * @ubi: UBI device description object */ -static int prot_tree_del(struct ubi_device *ubi, int pnum) +static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi) { - struct rb_node *p; - struct ubi_wl_prot_entry *pe = NULL; + struct ubi_fm_pool *pool = &ubi->fm_wl_pool; + int pnum; + + if (pool->used == pool->size || !pool->size) { + /* We cannot update the fastmap here because this + * function is called in atomic context. + * Let's fail here and refill/update it as soon as possible. */ +#ifndef __UBOOT__ + schedule_work(&ubi->fm_work); +#else + /* In U-Boot we must call this directly */ + ubi_update_fastmap(ubi); +#endif + return NULL; + } else { + pnum = pool->pebs[pool->used++]; + return ubi->lookuptbl[pnum]; + } +} +#else +static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi) +{ + struct ubi_wl_entry *e; - p = ubi->prot.pnum.rb_node; - while (p) { + e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); + self_check_in_wl_tree(ubi, e, &ubi->free); + rb_erase(&e->u.rb, &ubi->free); - pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum); + return e; +} - if (pnum == pe->e->pnum) - goto found; +int ubi_wl_get_peb(struct ubi_device *ubi) +{ + int peb, err; - if (pnum < pe->e->pnum) - p = p->rb_left; - else - p = p->rb_right; + spin_lock(&ubi->wl_lock); + peb = __wl_get_peb(ubi); + spin_unlock(&ubi->wl_lock); + + err = ubi_self_check_all_ff(ubi, peb, ubi->vid_hdr_aloffset, + ubi->peb_size - ubi->vid_hdr_aloffset); + if (err) { + ubi_err("new PEB %d does not contain all 0xFF bytes", peb); + return err; } - return -ENODEV; + return peb; +} +#endif -found: - ubi_assert(pe->e->pnum == pnum); - rb_erase(&pe->rb_aec, &ubi->prot.aec); - rb_erase(&pe->rb_pnum, &ubi->prot.pnum); - kfree(pe); +/** + * prot_queue_del - remove a physical eraseblock from the protection queue. + * @ubi: UBI device description object + * @pnum: the physical eraseblock to remove + * + * This function deletes PEB @pnum from the protection queue and returns zero + * in case of success and %-ENODEV if the PEB was not found. + */ +static int prot_queue_del(struct ubi_device *ubi, int pnum) +{ + struct ubi_wl_entry *e; + + e = ubi->lookuptbl[pnum]; + if (!e) + return -ENODEV; + + if (self_check_in_pq(ubi, e)) + return -ENODEV; + + list_del(&e->u.list); + dbg_wl("deleted PEB %d from the protection queue", e->pnum); return 0; } @@ -574,7 +729,8 @@ found: * This function returns zero in case of success and a negative error code in * case of failure. */ -static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture) +static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, + int torture) { int err; struct ubi_ec_hdr *ec_hdr; @@ -582,8 +738,8 @@ static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int tortur dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec); - err = paranoid_check_ec(ubi, e->pnum, e->ec); - if (err > 0) + err = self_check_ec(ubi, e->pnum, e->ec); + if (err) return -EINVAL; ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); @@ -626,91 +782,124 @@ out_free: } /** - * check_protection_over - check if it is time to stop protecting some - * physical eraseblocks. + * serve_prot_queue - check if it is time to stop protecting PEBs. * @ubi: UBI device description object * - * This function is called after each erase operation, when the absolute erase - * counter is incremented, to check if some physical eraseblock have not to be - * protected any longer. These physical eraseblocks are moved from the - * protection trees to the used tree. + * This function is called after each erase operation and removes PEBs from the + * tail of the protection queue. These PEBs have been protected for long enough + * and should be moved to the used tree. */ -static void check_protection_over(struct ubi_device *ubi) +static void serve_prot_queue(struct ubi_device *ubi) { - struct ubi_wl_prot_entry *pe; + struct ubi_wl_entry *e, *tmp; + int count; /* * There may be several protected physical eraseblock to remove, * process them all. */ - while (1) { - spin_lock(&ubi->wl_lock); - if (!ubi->prot.aec.rb_node) { - spin_unlock(&ubi->wl_lock); - break; - } - - pe = rb_entry(rb_first(&ubi->prot.aec), - struct ubi_wl_prot_entry, rb_aec); +repeat: + count = 0; + spin_lock(&ubi->wl_lock); + list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) { + dbg_wl("PEB %d EC %d protection over, move to used tree", + e->pnum, e->ec); - if (pe->abs_ec > ubi->abs_ec) { + list_del(&e->u.list); + wl_tree_add(e, &ubi->used); + if (count++ > 32) { + /* + * Let's be nice and avoid holding the spinlock for + * too long. + */ spin_unlock(&ubi->wl_lock); - break; + cond_resched(); + goto repeat; } - - dbg_wl("PEB %d protection over, abs_ec %llu, PEB abs_ec %llu", - pe->e->pnum, ubi->abs_ec, pe->abs_ec); - rb_erase(&pe->rb_aec, &ubi->prot.aec); - rb_erase(&pe->rb_pnum, &ubi->prot.pnum); - wl_tree_add(pe->e, &ubi->used); - spin_unlock(&ubi->wl_lock); - - kfree(pe); - cond_resched(); } + + ubi->pq_head += 1; + if (ubi->pq_head == UBI_PROT_QUEUE_LEN) + ubi->pq_head = 0; + ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN); + spin_unlock(&ubi->wl_lock); } /** - * schedule_ubi_work - schedule a work. + * __schedule_ubi_work - schedule a work. * @ubi: UBI device description object * @wrk: the work to schedule * - * This function enqueues a work defined by @wrk to the tail of the pending - * works list. + * This function adds a work defined by @wrk to the tail of the pending works + * list. Can only be used of ubi->work_sem is already held in read mode! */ -static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) +static void __schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) { spin_lock(&ubi->wl_lock); list_add_tail(&wrk->list, &ubi->works); ubi_assert(ubi->works_count >= 0); ubi->works_count += 1; - +#ifndef __UBOOT__ + if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled(ubi)) + wake_up_process(ubi->bgt_thread); +#else /* * U-Boot special: We have no bgt_thread in U-Boot! * So just call do_work() here directly. */ do_work(ubi); - +#endif spin_unlock(&ubi->wl_lock); } +/** + * schedule_ubi_work - schedule a work. + * @ubi: UBI device description object + * @wrk: the work to schedule + * + * This function adds a work defined by @wrk to the tail of the pending works + * list. + */ +static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) +{ + down_read(&ubi->work_sem); + __schedule_ubi_work(ubi, wrk); + up_read(&ubi->work_sem); +} + static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, int cancel); +#ifdef CONFIG_MTD_UBI_FASTMAP +/** + * ubi_is_erase_work - checks whether a work is erase work. + * @wrk: The work object to be checked + */ +int ubi_is_erase_work(struct ubi_work *wrk) +{ + return wrk->func == erase_worker; +} +#endif + /** * schedule_erase - schedule an erase work. * @ubi: UBI device description object * @e: the WL entry of the physical eraseblock to erase + * @vol_id: the volume ID that last used this PEB + * @lnum: the last used logical eraseblock number for the PEB * @torture: if the physical eraseblock has to be tortured * * This function returns zero in case of success and a %-ENOMEM in case of * failure. */ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, - int torture) + int vol_id, int lnum, int torture) { struct ubi_work *wl_wrk; + ubi_assert(e); + ubi_assert(!ubi_is_fm_block(ubi, e->pnum)); + dbg_wl("schedule erasure of PEB %d, EC %d, torture %d", e->pnum, e->ec, torture); @@ -720,6 +909,8 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, wl_wrk->func = &erase_worker; wl_wrk->e = e; + wl_wrk->vol_id = vol_id; + wl_wrk->lnum = lnum; wl_wrk->torture = torture; schedule_ubi_work(ubi, wl_wrk); @@ -727,6 +918,79 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, } /** + * do_sync_erase - run the erase worker synchronously. + * @ubi: UBI device description object + * @e: the WL entry of the physical eraseblock to erase + * @vol_id: the volume ID that last used this PEB + * @lnum: the last used logical eraseblock number for the PEB + * @torture: if the physical eraseblock has to be tortured + * + */ +static int do_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, + int vol_id, int lnum, int torture) +{ + struct ubi_work *wl_wrk; + + dbg_wl("sync erase of PEB %i", e->pnum); + + wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); + if (!wl_wrk) + return -ENOMEM; + + wl_wrk->e = e; + wl_wrk->vol_id = vol_id; + wl_wrk->lnum = lnum; + wl_wrk->torture = torture; + + return erase_worker(ubi, wl_wrk, 0); +} + +#ifdef CONFIG_MTD_UBI_FASTMAP +/** + * ubi_wl_put_fm_peb - returns a PEB used in a fastmap to the wear-leveling + * sub-system. + * see: ubi_wl_put_peb() + * + * @ubi: UBI device description object + * @fm_e: physical eraseblock to return + * @lnum: the last used logical eraseblock number for the PEB + * @torture: if this physical eraseblock has to be tortured + */ +int ubi_wl_put_fm_peb(struct ubi_device *ubi, struct ubi_wl_entry *fm_e, + int lnum, int torture) +{ + struct ubi_wl_entry *e; + int vol_id, pnum = fm_e->pnum; + + dbg_wl("PEB %d", pnum); + + ubi_assert(pnum >= 0); + ubi_assert(pnum < ubi->peb_count); + + spin_lock(&ubi->wl_lock); + e = ubi->lookuptbl[pnum]; + + /* This can happen if we recovered from a fastmap the very + * first time and writing now a new one. In this case the wl system + * has never seen any PEB used by the original fastmap. + */ + if (!e) { + e = fm_e; + ubi_assert(e->ec >= 0); + ubi->lookuptbl[pnum] = e; + } else { + e->ec = fm_e->ec; + kfree(fm_e); + } + + spin_unlock(&ubi->wl_lock); + + vol_id = lnum ? UBI_FM_DATA_VOLUME_ID : UBI_FM_SB_VOLUME_ID; + return schedule_erase(ubi, e, vol_id, lnum, torture); +} +#endif + +/** * wear_leveling_worker - wear-leveling worker function. * @ubi: UBI device description object * @wrk: the work object @@ -739,13 +1003,15 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, int cancel) { - int err, put = 0, scrubbing = 0, protect = 0; - struct ubi_wl_prot_entry *uninitialized_var(pe); + int err, scrubbing = 0, torture = 0, protect = 0, erroneous = 0; + int vol_id = -1, uninitialized_var(lnum); +#ifdef CONFIG_MTD_UBI_FASTMAP + int anchor = wrk->anchor; +#endif struct ubi_wl_entry *e1, *e2; struct ubi_vid_hdr *vid_hdr; kfree(wrk); - if (cancel) return 0; @@ -775,36 +1041,61 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, goto out_cancel; } +#ifdef CONFIG_MTD_UBI_FASTMAP + /* Check whether we need to produce an anchor PEB */ + if (!anchor) + anchor = !anchor_pebs_avalible(&ubi->free); + + if (anchor) { + e1 = find_anchor_wl_entry(&ubi->used); + if (!e1) + goto out_cancel; + e2 = get_peb_for_wl(ubi); + if (!e2) + goto out_cancel; + + self_check_in_wl_tree(ubi, e1, &ubi->used); + rb_erase(&e1->u.rb, &ubi->used); + dbg_wl("anchor-move PEB %d to PEB %d", e1->pnum, e2->pnum); + } else if (!ubi->scrub.rb_node) { +#else if (!ubi->scrub.rb_node) { +#endif /* * Now pick the least worn-out used physical eraseblock and a * highly worn-out free physical eraseblock. If the erase * counters differ much enough, start wear-leveling. */ - e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); - e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); + e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); + e2 = get_peb_for_wl(ubi); + if (!e2) + goto out_cancel; if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { dbg_wl("no WL needed: min used EC %d, max free EC %d", e1->ec, e2->ec); + + /* Give the unused PEB back */ + wl_tree_add(e2, &ubi->free); goto out_cancel; } - paranoid_check_in_wl_tree(e1, &ubi->used); - rb_erase(&e1->rb, &ubi->used); + self_check_in_wl_tree(ubi, e1, &ubi->used); + rb_erase(&e1->u.rb, &ubi->used); dbg_wl("move PEB %d EC %d to PEB %d EC %d", e1->pnum, e1->ec, e2->pnum, e2->ec); } else { /* Perform scrubbing */ scrubbing = 1; - e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb); - e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); - paranoid_check_in_wl_tree(e1, &ubi->scrub); - rb_erase(&e1->rb, &ubi->scrub); + e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb); + e2 = get_peb_for_wl(ubi); + if (!e2) + goto out_cancel; + + self_check_in_wl_tree(ubi, e1, &ubi->scrub); + rb_erase(&e1->u.rb, &ubi->scrub); dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); } - paranoid_check_in_wl_tree(e2, &ubi->free); - rb_erase(&e2->rb, &ubi->free); ubi->move_from = e1; ubi->move_to = e2; spin_unlock(&ubi->wl_lock); @@ -822,81 +1113,127 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); if (err && err != UBI_IO_BITFLIPS) { - if (err == UBI_IO_PEB_FREE) { + if (err == UBI_IO_FF) { /* * We are trying to move PEB without a VID header. UBI * always write VID headers shortly after the PEB was - * given, so we have a situation when it did not have - * chance to write it down because it was preempted. - * Just re-schedule the work, so that next time it will - * likely have the VID header in place. + * given, so we have a situation when it has not yet + * had a chance to write it, because it was preempted. + * So add this PEB to the protection queue so far, + * because presumably more data will be written there + * (including the missing VID header), and then we'll + * move it. */ dbg_wl("PEB %d has no VID header", e1->pnum); + protect = 1; + goto out_not_moved; + } else if (err == UBI_IO_FF_BITFLIPS) { + /* + * The same situation as %UBI_IO_FF, but bit-flips were + * detected. It is better to schedule this PEB for + * scrubbing. + */ + dbg_wl("PEB %d has no VID header but has bit-flips", + e1->pnum); + scrubbing = 1; goto out_not_moved; } ubi_err("error %d while reading VID header from PEB %d", err, e1->pnum); - if (err > 0) - err = -EIO; goto out_error; } + vol_id = be32_to_cpu(vid_hdr->vol_id); + lnum = be32_to_cpu(vid_hdr->lnum); + err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); if (err) { - - if (err < 0) - goto out_error; - if (err == 1) + if (err == MOVE_CANCEL_RACE) { + /* + * The LEB has not been moved because the volume is + * being deleted or the PEB has been put meanwhile. We + * should prevent this PEB from being selected for + * wear-leveling movement again, so put it to the + * protection queue. + */ + protect = 1; + goto out_not_moved; + } + if (err == MOVE_RETRY) { + scrubbing = 1; goto out_not_moved; + } + if (err == MOVE_TARGET_BITFLIPS || err == MOVE_TARGET_WR_ERR || + err == MOVE_TARGET_RD_ERR) { + /* + * Target PEB had bit-flips or write error - torture it. + */ + torture = 1; + goto out_not_moved; + } - /* - * For some reason the LEB was not moved - it might be because - * the volume is being deleted. We should prevent this PEB from - * being selected for wear-levelling movement for some "time", - * so put it to the protection tree. - */ + if (err == MOVE_SOURCE_RD_ERR) { + /* + * An error happened while reading the source PEB. Do + * not switch to R/O mode in this case, and give the + * upper layers a possibility to recover from this, + * e.g. by unmapping corresponding LEB. Instead, just + * put this PEB to the @ubi->erroneous list to prevent + * UBI from trying to move it over and over again. + */ + if (ubi->erroneous_peb_count > ubi->max_erroneous) { + ubi_err("too many erroneous eraseblocks (%d)", + ubi->erroneous_peb_count); + goto out_error; + } + erroneous = 1; + goto out_not_moved; + } - dbg_wl("cancelled moving PEB %d", e1->pnum); - pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); - if (!pe) { - err = -ENOMEM; + if (err < 0) goto out_error; - } - protect = 1; + ubi_assert(0); } + /* The PEB has been successfully moved */ + if (scrubbing) + ubi_msg("scrubbed PEB %d (LEB %d:%d), data moved to PEB %d", + e1->pnum, vol_id, lnum, e2->pnum); ubi_free_vid_hdr(ubi, vid_hdr); + spin_lock(&ubi->wl_lock); - if (protect) - prot_tree_add(ubi, e1, pe, protect); - if (!ubi->move_to_put) + if (!ubi->move_to_put) { wl_tree_add(e2, &ubi->used); - else - put = 1; + e2 = NULL; + } ubi->move_from = ubi->move_to = NULL; ubi->move_to_put = ubi->wl_scheduled = 0; spin_unlock(&ubi->wl_lock); - if (put) { + err = do_sync_erase(ubi, e1, vol_id, lnum, 0); + if (err) { + kmem_cache_free(ubi_wl_entry_slab, e1); + if (e2) + kmem_cache_free(ubi_wl_entry_slab, e2); + goto out_ro; + } + + if (e2) { /* * Well, the target PEB was put meanwhile, schedule it for * erasure. */ - dbg_wl("PEB %d was put meanwhile, erase", e2->pnum); - err = schedule_erase(ubi, e2, 0); - if (err) - goto out_error; - } - - if (!protect) { - err = schedule_erase(ubi, e1, 0); - if (err) - goto out_error; + dbg_wl("PEB %d (LEB %d:%d) was put meanwhile, erase", + e2->pnum, vol_id, lnum); + err = do_sync_erase(ubi, e2, vol_id, lnum, 0); + if (err) { + kmem_cache_free(ubi_wl_entry_slab, e2); + goto out_ro; + } } - dbg_wl("done"); mutex_unlock(&ubi->move_mutex); return 0; @@ -904,42 +1241,60 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, /* * For some reasons the LEB was not moved, might be an error, might be * something else. @e1 was not changed, so return it back. @e2 might - * be changed, schedule it for erasure. + * have been changed, schedule it for erasure. */ out_not_moved: - ubi_free_vid_hdr(ubi, vid_hdr); + if (vol_id != -1) + dbg_wl("cancel moving PEB %d (LEB %d:%d) to PEB %d (%d)", + e1->pnum, vol_id, lnum, e2->pnum, err); + else + dbg_wl("cancel moving PEB %d to PEB %d (%d)", + e1->pnum, e2->pnum, err); spin_lock(&ubi->wl_lock); - if (scrubbing) + if (protect) + prot_queue_add(ubi, e1); + else if (erroneous) { + wl_tree_add(e1, &ubi->erroneous); + ubi->erroneous_peb_count += 1; + } else if (scrubbing) wl_tree_add(e1, &ubi->scrub); else wl_tree_add(e1, &ubi->used); + ubi_assert(!ubi->move_to_put); ubi->move_from = ubi->move_to = NULL; - ubi->move_to_put = ubi->wl_scheduled = 0; + ubi->wl_scheduled = 0; spin_unlock(&ubi->wl_lock); - err = schedule_erase(ubi, e2, 0); - if (err) - goto out_error; - + ubi_free_vid_hdr(ubi, vid_hdr); + err = do_sync_erase(ubi, e2, vol_id, lnum, torture); + if (err) { + kmem_cache_free(ubi_wl_entry_slab, e2); + goto out_ro; + } mutex_unlock(&ubi->move_mutex); return 0; out_error: - ubi_err("error %d while moving PEB %d to PEB %d", - err, e1->pnum, e2->pnum); - - ubi_free_vid_hdr(ubi, vid_hdr); + if (vol_id != -1) + ubi_err("error %d while moving PEB %d to PEB %d", + err, e1->pnum, e2->pnum); + else + ubi_err("error %d while moving PEB %d (LEB %d:%d) to PEB %d", + err, e1->pnum, vol_id, lnum, e2->pnum); spin_lock(&ubi->wl_lock); ubi->move_from = ubi->move_to = NULL; ubi->move_to_put = ubi->wl_scheduled = 0; spin_unlock(&ubi->wl_lock); + ubi_free_vid_hdr(ubi, vid_hdr); kmem_cache_free(ubi_wl_entry_slab, e1); kmem_cache_free(ubi_wl_entry_slab, e2); - ubi_ro_mode(ubi); +out_ro: + ubi_ro_mode(ubi); mutex_unlock(&ubi->move_mutex); - return err; + ubi_assert(err != 0); + return err < 0 ? err : -EIO; out_cancel: ubi->wl_scheduled = 0; @@ -952,12 +1307,13 @@ out_cancel: /** * ensure_wear_leveling - schedule wear-leveling if it is needed. * @ubi: UBI device description object + * @nested: set to non-zero if this function is called from UBI worker * * This function checks if it is time to start wear-leveling and schedules it * if yes. This function returns zero in case of success and a negative error * code in case of failure. */ -static int ensure_wear_leveling(struct ubi_device *ubi) +static int ensure_wear_leveling(struct ubi_device *ubi, int nested) { int err = 0; struct ubi_wl_entry *e1; @@ -981,11 +1337,11 @@ static int ensure_wear_leveling(struct ubi_device *ubi) /* * We schedule wear-leveling only if the difference between the * lowest erase counter of used physical eraseblocks and a high - * erase counter of free physical eraseblocks is greater then + * erase counter of free physical eraseblocks is greater than * %UBI_WL_THRESHOLD. */ - e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); - e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); + e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); + e2 = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) goto out_unlock; @@ -1002,8 +1358,12 @@ static int ensure_wear_leveling(struct ubi_device *ubi) goto out_cancel; } + wrk->anchor = 0; wrk->func = &wear_leveling_worker; - schedule_ubi_work(ubi, wrk); + if (nested) + __schedule_ubi_work(ubi, wrk); + else + schedule_ubi_work(ubi, wrk); return err; out_cancel: @@ -1014,6 +1374,38 @@ out_unlock: return err; } +#ifdef CONFIG_MTD_UBI_FASTMAP +/** + * ubi_ensure_anchor_pebs - schedule wear-leveling to produce an anchor PEB. + * @ubi: UBI device description object + */ +int ubi_ensure_anchor_pebs(struct ubi_device *ubi) +{ + struct ubi_work *wrk; + + spin_lock(&ubi->wl_lock); + if (ubi->wl_scheduled) { + spin_unlock(&ubi->wl_lock); + return 0; + } + ubi->wl_scheduled = 1; + spin_unlock(&ubi->wl_lock); + + wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); + if (!wrk) { + spin_lock(&ubi->wl_lock); + ubi->wl_scheduled = 0; + spin_unlock(&ubi->wl_lock); + return -ENOMEM; + } + + wrk->anchor = 1; + wrk->func = &wear_leveling_worker; + schedule_ubi_work(ubi, wrk); + return 0; +} +#endif + /** * erase_worker - physical eraseblock erase worker function. * @ubi: UBI device description object @@ -1029,7 +1421,10 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, int cancel) { struct ubi_wl_entry *e = wl_wrk->e; - int pnum = e->pnum, err, need; + int pnum = e->pnum; + int vol_id = wl_wrk->vol_id; + int lnum = wl_wrk->lnum; + int err, available_consumed = 0; if (cancel) { dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec); @@ -1038,7 +1433,10 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, return 0; } - dbg_wl("erase PEB %d EC %d", pnum, e->ec); + dbg_wl("erase PEB %d EC %d LEB %d:%d", + pnum, e->ec, wl_wrk->vol_id, wl_wrk->lnum); + + ubi_assert(!ubi_is_fm_block(ubi, e->pnum)); err = sync_erase(ubi, e, wl_wrk->torture); if (!err) { @@ -1046,44 +1444,45 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, kfree(wl_wrk); spin_lock(&ubi->wl_lock); - ubi->abs_ec += 1; wl_tree_add(e, &ubi->free); + ubi->free_count++; spin_unlock(&ubi->wl_lock); /* - * One more erase operation has happened, take care about protected - * physical eraseblocks. + * One more erase operation has happened, take care about + * protected physical eraseblocks. */ - check_protection_over(ubi); + serve_prot_queue(ubi); /* And take care about wear-leveling */ - err = ensure_wear_leveling(ubi); + err = ensure_wear_leveling(ubi, 1); return err; } ubi_err("failed to erase PEB %d, error %d", pnum, err); kfree(wl_wrk); - kmem_cache_free(ubi_wl_entry_slab, e); if (err == -EINTR || err == -ENOMEM || err == -EAGAIN || err == -EBUSY) { int err1; /* Re-schedule the LEB for erasure */ - err1 = schedule_erase(ubi, e, 0); + err1 = schedule_erase(ubi, e, vol_id, lnum, 0); if (err1) { err = err1; goto out_ro; } return err; - } else if (err != -EIO) { + } + + kmem_cache_free(ubi_wl_entry_slab, e); + if (err != -EIO) /* * If this is not %-EIO, we have no idea what to do. Scheduling * this physical eraseblock for erasure again would cause - * errors again and again. Well, lets switch to RO mode. + * errors again and again. Well, lets switch to R/O mode. */ goto out_ro; - } /* It is %-EIO, the PEB went bad */ @@ -1093,48 +1492,62 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, } spin_lock(&ubi->volumes_lock); - need = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs + 1; - if (need > 0) { - need = ubi->avail_pebs >= need ? need : ubi->avail_pebs; - ubi->avail_pebs -= need; - ubi->rsvd_pebs += need; - ubi->beb_rsvd_pebs += need; - if (need > 0) - ubi_msg("reserve more %d PEBs", need); - } - if (ubi->beb_rsvd_pebs == 0) { - spin_unlock(&ubi->volumes_lock); - ubi_err("no reserved physical eraseblocks"); - goto out_ro; + if (ubi->avail_pebs == 0) { + spin_unlock(&ubi->volumes_lock); + ubi_err("no reserved/available physical eraseblocks"); + goto out_ro; + } + ubi->avail_pebs -= 1; + available_consumed = 1; } - spin_unlock(&ubi->volumes_lock); - ubi_msg("mark PEB %d as bad", pnum); + ubi_msg("mark PEB %d as bad", pnum); err = ubi_io_mark_bad(ubi, pnum); if (err) goto out_ro; spin_lock(&ubi->volumes_lock); - ubi->beb_rsvd_pebs -= 1; + if (ubi->beb_rsvd_pebs > 0) { + if (available_consumed) { + /* + * The amount of reserved PEBs increased since we last + * checked. + */ + ubi->avail_pebs += 1; + available_consumed = 0; + } + ubi->beb_rsvd_pebs -= 1; + } ubi->bad_peb_count += 1; ubi->good_peb_count -= 1; ubi_calculate_reserved(ubi); - if (ubi->beb_rsvd_pebs == 0) - ubi_warn("last PEB from the reserved pool was used"); + if (available_consumed) + ubi_warn("no PEBs in the reserved pool, used an available PEB"); + else if (ubi->beb_rsvd_pebs) + ubi_msg("%d PEBs left in the reserve", ubi->beb_rsvd_pebs); + else + ubi_warn("last PEB from the reserve was used"); spin_unlock(&ubi->volumes_lock); return err; out_ro: + if (available_consumed) { + spin_lock(&ubi->volumes_lock); + ubi->avail_pebs += 1; + spin_unlock(&ubi->volumes_lock); + } ubi_ro_mode(ubi); return err; } /** - * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit. + * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system. * @ubi: UBI device description object + * @vol_id: the volume ID that last used this PEB + * @lnum: the last used logical eraseblock number for the PEB * @pnum: physical eraseblock to return * @torture: if this physical eraseblock has to be tortured * @@ -1143,7 +1556,8 @@ out_ro: * occurred to this @pnum and it has to be tested. This function returns zero * in case of success, and a negative error code in case of failure. */ -int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) +int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum, + int pnum, int torture) { int err; struct ubi_wl_entry *e; @@ -1172,11 +1586,11 @@ retry: /* * User is putting the physical eraseblock which was selected * as the target the data is moved to. It may happen if the EBA - * unit already re-mapped the LEB in 'ubi_eba_copy_leb()' but - * the WL unit has not put the PEB to the "used" tree yet, but - * it is about to do this. So we just set a flag which will - * tell the WL worker that the PEB is not needed anymore and - * should be scheduled for erasure. + * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()' + * but the WL sub-system has not put the PEB to the "used" tree + * yet, but it is about to do this. So we just set a flag which + * will tell the WL worker that the PEB is not needed anymore + * and should be scheduled for erasure. */ dbg_wl("PEB %d is the target of data moving", pnum); ubi_assert(!ubi->move_to_put); @@ -1185,13 +1599,20 @@ retry: return 0; } else { if (in_wl_tree(e, &ubi->used)) { - paranoid_check_in_wl_tree(e, &ubi->used); - rb_erase(&e->rb, &ubi->used); + self_check_in_wl_tree(ubi, e, &ubi->used); + rb_erase(&e->u.rb, &ubi->used); } else if (in_wl_tree(e, &ubi->scrub)) { - paranoid_check_in_wl_tree(e, &ubi->scrub); - rb_erase(&e->rb, &ubi->scrub); + self_check_in_wl_tree(ubi, e, &ubi->scrub); + rb_erase(&e->u.rb, &ubi->scrub); + } else if (in_wl_tree(e, &ubi->erroneous)) { + self_check_in_wl_tree(ubi, e, &ubi->erroneous); + rb_erase(&e->u.rb, &ubi->erroneous); + ubi->erroneous_peb_count -= 1; + ubi_assert(ubi->erroneous_peb_count >= 0); + /* Erroneous PEBs should be tortured */ + torture = 1; } else { - err = prot_tree_del(ubi, e->pnum); + err = prot_queue_del(ubi, e->pnum); if (err) { ubi_err("PEB %d not found", pnum); ubi_ro_mode(ubi); @@ -1202,7 +1623,7 @@ retry: } spin_unlock(&ubi->wl_lock); - err = schedule_erase(ubi, e, torture); + err = schedule_erase(ubi, e, vol_id, lnum, torture); if (err) { spin_lock(&ubi->wl_lock); wl_tree_add(e, &ubi->used); @@ -1231,7 +1652,8 @@ int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum) retry: spin_lock(&ubi->wl_lock); e = ubi->lookuptbl[pnum]; - if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub)) { + if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub) || + in_wl_tree(e, &ubi->erroneous)) { spin_unlock(&ubi->wl_lock); return 0; } @@ -1250,12 +1672,12 @@ retry: } if (in_wl_tree(e, &ubi->used)) { - paranoid_check_in_wl_tree(e, &ubi->used); - rb_erase(&e->rb, &ubi->used); + self_check_in_wl_tree(ubi, e, &ubi->used); + rb_erase(&e->u.rb, &ubi->used); } else { int err; - err = prot_tree_del(ubi, e->pnum); + err = prot_queue_del(ubi, e->pnum); if (err) { ubi_err("PEB %d not found", pnum); ubi_ro_mode(ubi); @@ -1271,29 +1693,60 @@ retry: * Technically scrubbing is the same as wear-leveling, so it is done * by the WL worker. */ - return ensure_wear_leveling(ubi); + return ensure_wear_leveling(ubi, 0); } /** * ubi_wl_flush - flush all pending works. * @ubi: UBI device description object + * @vol_id: the volume id to flush for + * @lnum: the logical eraseblock number to flush for * - * This function returns zero in case of success and a negative error code in - * case of failure. + * This function executes all pending works for a particular volume id / + * logical eraseblock number pair. If either value is set to %UBI_ALL, then it + * acts as a wildcard for all of the corresponding volume numbers or logical + * eraseblock numbers. It returns zero in case of success and a negative error + * code in case of failure. */ -int ubi_wl_flush(struct ubi_device *ubi) +int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum) { - int err; + int err = 0; + int found = 1; /* - * Erase while the pending works queue is not empty, but not more then + * Erase while the pending works queue is not empty, but not more than * the number of currently pending works. */ - dbg_wl("flush (%d pending works)", ubi->works_count); - while (ubi->works_count) { - err = do_work(ubi); - if (err) - return err; + dbg_wl("flush pending work for LEB %d:%d (%d pending works)", + vol_id, lnum, ubi->works_count); + + while (found) { + struct ubi_work *wrk; + found = 0; + + down_read(&ubi->work_sem); + spin_lock(&ubi->wl_lock); + list_for_each_entry(wrk, &ubi->works, list) { + if ((vol_id == UBI_ALL || wrk->vol_id == vol_id) && + (lnum == UBI_ALL || wrk->lnum == lnum)) { + list_del(&wrk->list); + ubi->works_count -= 1; + ubi_assert(ubi->works_count >= 0); + spin_unlock(&ubi->wl_lock); + + err = wrk->func(ubi, wrk, 0); + if (err) { + up_read(&ubi->work_sem); + return err; + } + + spin_lock(&ubi->wl_lock); + found = 1; + break; + } + } + spin_unlock(&ubi->wl_lock); + up_read(&ubi->work_sem); } /* @@ -1303,18 +1756,7 @@ int ubi_wl_flush(struct ubi_device *ubi) down_write(&ubi->work_sem); up_write(&ubi->work_sem); - /* - * And in case last was the WL worker and it cancelled the LEB - * movement, flush again. - */ - while (ubi->works_count) { - dbg_wl("flush more (%d pending works)", ubi->works_count); - err = do_work(ubi); - if (err) - return err; - } - - return 0; + return err; } /** @@ -1333,11 +1775,11 @@ static void tree_destroy(struct rb_root *root) else if (rb->rb_right) rb = rb->rb_right; else { - e = rb_entry(rb, struct ubi_wl_entry, rb); + e = rb_entry(rb, struct ubi_wl_entry, u.rb); rb = rb_parent(rb); if (rb) { - if (rb->rb_left == &e->rb) + if (rb->rb_left == &e->u.rb) rb->rb_left = NULL; else rb->rb_right = NULL; @@ -1372,7 +1814,7 @@ int ubi_thread(void *u) spin_lock(&ubi->wl_lock); if (list_empty(&ubi->works) || ubi->ro_mode || - !ubi->thread_enabled) { + !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) { set_current_state(TASK_INTERRUPTIBLE); spin_unlock(&ubi->wl_lock); schedule(); @@ -1392,7 +1834,8 @@ int ubi_thread(void *u) ubi_msg("%s: %d consecutive failures", ubi->bgt_name, WL_MAX_FAILURES); ubi_ro_mode(ubi); - break; + ubi->thread_enabled = 0; + continue; } } else failures = 0; @@ -1422,30 +1865,32 @@ static void cancel_pending(struct ubi_device *ubi) } /** - * ubi_wl_init_scan - initialize the wear-leveling unit using scanning - * information. + * ubi_wl_init - initialize the WL sub-system using attaching information. * @ubi: UBI device description object - * @si: scanning information + * @ai: attaching information * * This function returns zero in case of success, and a negative error code in * case of failure. */ -int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) +int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) { - int err; + int err, i, reserved_pebs, found_pebs = 0; struct rb_node *rb1, *rb2; - struct ubi_scan_volume *sv; - struct ubi_scan_leb *seb, *tmp; + struct ubi_ainf_volume *av; + struct ubi_ainf_peb *aeb, *tmp; struct ubi_wl_entry *e; - - ubi->used = ubi->free = ubi->scrub = RB_ROOT; - ubi->prot.pnum = ubi->prot.aec = RB_ROOT; + ubi->used = ubi->erroneous = ubi->free = ubi->scrub = RB_ROOT; spin_lock_init(&ubi->wl_lock); mutex_init(&ubi->move_mutex); init_rwsem(&ubi->work_sem); - ubi->max_ec = si->max_ec; + ubi->max_ec = ai->max_ec; INIT_LIST_HEAD(&ubi->works); +#ifndef __UBOOT__ +#ifdef CONFIG_MTD_UBI_FASTMAP + INIT_WORK(&ubi->fm_work, update_fastmap_work_fn); +#endif +#endif sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num); @@ -1454,64 +1899,63 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) if (!ubi->lookuptbl) return err; - list_for_each_entry_safe(seb, tmp, &si->erase, u.list) { + for (i = 0; i < UBI_PROT_QUEUE_LEN; i++) + INIT_LIST_HEAD(&ubi->pq[i]); + ubi->pq_head = 0; + + list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) { cond_resched(); e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); if (!e) goto out_free; - e->pnum = seb->pnum; - e->ec = seb->ec; + e->pnum = aeb->pnum; + e->ec = aeb->ec; + ubi_assert(!ubi_is_fm_block(ubi, e->pnum)); ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, 0)) { + if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) { kmem_cache_free(ubi_wl_entry_slab, e); goto out_free; } + + found_pebs++; } - list_for_each_entry(seb, &si->free, u.list) { + ubi->free_count = 0; + list_for_each_entry(aeb, &ai->free, u.list) { cond_resched(); e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); if (!e) goto out_free; - e->pnum = seb->pnum; - e->ec = seb->ec; + e->pnum = aeb->pnum; + e->ec = aeb->ec; ubi_assert(e->ec >= 0); - wl_tree_add(e, &ubi->free); - ubi->lookuptbl[e->pnum] = e; - } - - list_for_each_entry(seb, &si->corr, u.list) { - cond_resched(); + ubi_assert(!ubi_is_fm_block(ubi, e->pnum)); - e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) - goto out_free; + wl_tree_add(e, &ubi->free); + ubi->free_count++; - e->pnum = seb->pnum; - e->ec = seb->ec; ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, 0)) { - kmem_cache_free(ubi_wl_entry_slab, e); - goto out_free; - } + + found_pebs++; } - ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) { - ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) { + ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) { + ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) { cond_resched(); e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); if (!e) goto out_free; - e->pnum = seb->pnum; - e->ec = seb->ec; + e->pnum = aeb->pnum; + e->ec = aeb->ec; ubi->lookuptbl[e->pnum] = e; - if (!seb->scrub) { + + if (!aeb->scrub) { dbg_wl("add PEB %d EC %d to the used tree", e->pnum, e->ec); wl_tree_add(e, &ubi->used); @@ -1520,20 +1964,38 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) e->pnum, e->ec); wl_tree_add(e, &ubi->scrub); } + + found_pebs++; } } - if (ubi->avail_pebs < WL_RESERVED_PEBS) { + dbg_wl("found %i PEBs", found_pebs); + + if (ubi->fm) + ubi_assert(ubi->good_peb_count == \ + found_pebs + ubi->fm->used_blocks); + else + ubi_assert(ubi->good_peb_count == found_pebs); + + reserved_pebs = WL_RESERVED_PEBS; +#ifdef CONFIG_MTD_UBI_FASTMAP + /* Reserve enough LEBs to store two fastmaps. */ + reserved_pebs += (ubi->fm_size / ubi->leb_size) * 2; +#endif + + if (ubi->avail_pebs < reserved_pebs) { ubi_err("no enough physical eraseblocks (%d, need %d)", - ubi->avail_pebs, WL_RESERVED_PEBS); - err = -ENOSPC; + ubi->avail_pebs, reserved_pebs); + if (ubi->corr_peb_count) + ubi_err("%d PEBs are corrupted and not used", + ubi->corr_peb_count); goto out_free; } - ubi->avail_pebs -= WL_RESERVED_PEBS; - ubi->rsvd_pebs += WL_RESERVED_PEBS; + ubi->avail_pebs -= reserved_pebs; + ubi->rsvd_pebs += reserved_pebs; /* Schedule wear-leveling if needed */ - err = ensure_wear_leveling(ubi); + err = ensure_wear_leveling(ubi, 0); if (err) goto out_free; @@ -1549,72 +2011,57 @@ out_free: } /** - * protection_trees_destroy - destroy the protection RB-trees. + * protection_queue_destroy - destroy the protection queue. * @ubi: UBI device description object */ -static void protection_trees_destroy(struct ubi_device *ubi) +static void protection_queue_destroy(struct ubi_device *ubi) { - struct rb_node *rb; - struct ubi_wl_prot_entry *pe; + int i; + struct ubi_wl_entry *e, *tmp; - rb = ubi->prot.aec.rb_node; - while (rb) { - if (rb->rb_left) - rb = rb->rb_left; - else if (rb->rb_right) - rb = rb->rb_right; - else { - pe = rb_entry(rb, struct ubi_wl_prot_entry, rb_aec); - - rb = rb_parent(rb); - if (rb) { - if (rb->rb_left == &pe->rb_aec) - rb->rb_left = NULL; - else - rb->rb_right = NULL; - } - - kmem_cache_free(ubi_wl_entry_slab, pe->e); - kfree(pe); + for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) { + list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) { + list_del(&e->u.list); + kmem_cache_free(ubi_wl_entry_slab, e); } } } /** - * ubi_wl_close - close the wear-leveling unit. + * ubi_wl_close - close the wear-leveling sub-system. * @ubi: UBI device description object */ void ubi_wl_close(struct ubi_device *ubi) { - dbg_wl("close the UBI wear-leveling unit"); - + dbg_wl("close the WL sub-system"); cancel_pending(ubi); - protection_trees_destroy(ubi); + protection_queue_destroy(ubi); tree_destroy(&ubi->used); + tree_destroy(&ubi->erroneous); tree_destroy(&ubi->free); tree_destroy(&ubi->scrub); kfree(ubi->lookuptbl); } -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID - /** - * paranoid_check_ec - make sure that the erase counter of a physical eraseblock - * is correct. + * self_check_ec - make sure that the erase counter of a PEB is correct. * @ubi: UBI device description object * @pnum: the physical eraseblock number to check * @ec: the erase counter to check * * This function returns zero if the erase counter of physical eraseblock @pnum - * is equivalent to @ec, %1 if not, and a negative error code if an error + * is equivalent to @ec, and a negative error code if not or if an error * occurred. */ -static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec) +static int self_check_ec(struct ubi_device *ubi, int pnum, int ec) { int err; long long read_ec; struct ubi_ec_hdr *ec_hdr; + if (!ubi_dbg_chk_gen(ubi)) + return 0; + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); if (!ec_hdr) return -ENOMEM; @@ -1627,10 +2074,10 @@ static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec) } read_ec = be64_to_cpu(ec_hdr->ec); - if (ec != read_ec) { - ubi_err("paranoid check failed for PEB %d", pnum); + if (ec != read_ec && read_ec - ec > 1) { + ubi_err("self-check failed for PEB %d", pnum); ubi_err("read EC is %lld, should be %d", read_ec, ec); - ubi_dbg_dump_stack(); + dump_stack(); err = 1; } else err = 0; @@ -1641,24 +2088,53 @@ out_free: } /** - * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present - * in a WL RB-tree. + * self_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree. + * @ubi: UBI device description object * @e: the wear-leveling entry to check * @root: the root of the tree * - * This function returns zero if @e is in the @root RB-tree and %1 if it + * This function returns zero if @e is in the @root RB-tree and %-EINVAL if it * is not. */ -static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, - struct rb_root *root) +static int self_check_in_wl_tree(const struct ubi_device *ubi, + struct ubi_wl_entry *e, struct rb_root *root) { + if (!ubi_dbg_chk_gen(ubi)) + return 0; + if (in_wl_tree(e, root)) return 0; - ubi_err("paranoid check failed for PEB %d, EC %d, RB-tree %p ", + ubi_err("self-check failed for PEB %d, EC %d, RB-tree %p ", e->pnum, e->ec, root); - ubi_dbg_dump_stack(); - return 1; + dump_stack(); + return -EINVAL; } -#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ +/** + * self_check_in_pq - check if wear-leveling entry is in the protection + * queue. + * @ubi: UBI device description object + * @e: the wear-leveling entry to check + * + * This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not. + */ +static int self_check_in_pq(const struct ubi_device *ubi, + struct ubi_wl_entry *e) +{ + struct ubi_wl_entry *p; + int i; + + if (!ubi_dbg_chk_gen(ubi)) + return 0; + + for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) + list_for_each_entry(p, &ubi->pq[i], u.list) + if (p == e) + return 0; + + ubi_err("self-check failed for PEB %d, EC %d, Protect queue", + e->pnum, e->ec); + dump_stack(); + return -EINVAL; +} diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 85377ea..9ed4017 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) @@ -31,32 +20,171 @@ */ #include "ubifs.h" +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/writeback.h> +#else +#include <linux/err.h> +#endif #include <linux/math64.h> +/* + * When pessimistic budget calculations say that there is no enough space, + * UBIFS starts writing back dirty inodes and pages, doing garbage collection, + * or committing. The below constant defines maximum number of times UBIFS + * repeats the operations. + */ +#define MAX_MKSPC_RETRIES 3 + +/* + * The below constant defines amount of dirty pages which should be written + * back at when trying to shrink the liability. + */ +#define NR_TO_WRITE 16 + +#ifndef __UBOOT__ +/** + * shrink_liability - write-back some dirty pages/inodes. + * @c: UBIFS file-system description object + * @nr_to_write: how many dirty pages to write-back + * + * This function shrinks UBIFS liability by means of writing back some amount + * of dirty inodes and their pages. + * + * Note, this function synchronizes even VFS inodes which are locked + * (@i_mutex) by the caller of the budgeting function, because write-back does + * not touch @i_mutex. + */ +static void shrink_liability(struct ubifs_info *c, int nr_to_write) +{ + down_read(&c->vfs_sb->s_umount); + writeback_inodes_sb(c->vfs_sb, WB_REASON_FS_FREE_SPACE); + up_read(&c->vfs_sb->s_umount); +} + +/** + * run_gc - run garbage collector. + * @c: UBIFS file-system description object + * + * This function runs garbage collector to make some more free space. Returns + * zero if a free LEB has been produced, %-EAGAIN if commit is required, and a + * negative error code in case of failure. + */ +static int run_gc(struct ubifs_info *c) +{ + int err, lnum; + + /* Make some free space by garbage-collecting dirty space */ + down_read(&c->commit_sem); + lnum = ubifs_garbage_collect(c, 1); + up_read(&c->commit_sem); + if (lnum < 0) + return lnum; + + /* GC freed one LEB, return it to lprops */ + dbg_budg("GC freed LEB %d", lnum); + err = ubifs_return_leb(c, lnum); + if (err) + return err; + return 0; +} + /** - * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index. + * get_liability - calculate current liability. * @c: UBIFS file-system description object * - * This function calculates and returns the number of eraseblocks which should - * be kept for index usage. + * This function calculates and returns current UBIFS liability, i.e. the + * amount of bytes UBIFS has "promised" to write to the media. + */ +static long long get_liability(struct ubifs_info *c) +{ + long long liab; + + spin_lock(&c->space_lock); + liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth; + spin_unlock(&c->space_lock); + return liab; +} + +/** + * make_free_space - make more free space on the file-system. + * @c: UBIFS file-system description object + * + * This function is called when an operation cannot be budgeted because there + * is supposedly no free space. But in most cases there is some free space: + * o budgeting is pessimistic, so it always budgets more than it is actually + * needed, so shrinking the liability is one way to make free space - the + * cached data will take less space then it was budgeted for; + * o GC may turn some dark space into free space (budgeting treats dark space + * as not available); + * o commit may free some LEB, i.e., turn freeable LEBs into free LEBs. + * + * So this function tries to do the above. Returns %-EAGAIN if some free space + * was presumably made and the caller has to re-try budgeting the operation. + * Returns %-ENOSPC if it couldn't do more free space, and other negative error + * codes on failures. + */ +static int make_free_space(struct ubifs_info *c) +{ + int err, retries = 0; + long long liab1, liab2; + + do { + liab1 = get_liability(c); + /* + * We probably have some dirty pages or inodes (liability), try + * to write them back. + */ + dbg_budg("liability %lld, run write-back", liab1); + shrink_liability(c, NR_TO_WRITE); + + liab2 = get_liability(c); + if (liab2 < liab1) + return -EAGAIN; + + dbg_budg("new liability %lld (not shrunk)", liab2); + + /* Liability did not shrink again, try GC */ + dbg_budg("Run GC"); + err = run_gc(c); + if (!err) + return -EAGAIN; + + if (err != -EAGAIN && err != -ENOSPC) + /* Some real error happened */ + return err; + + dbg_budg("Run commit (retries %d)", retries); + err = ubifs_run_commit(c); + if (err) + return err; + } while (retries++ < MAX_MKSPC_RETRIES); + + return -ENOSPC; +} +#endif + +/** + * ubifs_calc_min_idx_lebs - calculate amount of LEBs for the index. + * @c: UBIFS file-system description object + * + * This function calculates and returns the number of LEBs which should be kept + * for index usage. */ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) { - int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz; + int idx_lebs; long long idx_size; - idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; - + idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx; /* And make sure we have thrice the index size of space reserved */ - idx_size = idx_size + (idx_size << 1); - + idx_size += idx_size << 1; /* * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' * pair, nor similarly the two variables for the new index size, so we * have to do this costly 64-bit division on fast-path. */ - idx_size += eff_leb_size - 1; - idx_lebs = div_u64(idx_size, eff_leb_size); + idx_lebs = div_u64(idx_size + c->idx_leb_size - 1, c->idx_leb_size); /* * The index head is not available for the in-the-gaps method, so add an * extra LEB to compensate. @@ -67,6 +195,424 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) return idx_lebs; } +#ifndef __UBOOT__ +/** + * ubifs_calc_available - calculate available FS space. + * @c: UBIFS file-system description object + * @min_idx_lebs: minimum number of LEBs reserved for the index + * + * This function calculates and returns amount of FS space available for use. + */ +long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) +{ + int subtract_lebs; + long long available; + + available = c->main_bytes - c->lst.total_used; + + /* + * Now 'available' contains theoretically available flash space + * assuming there is no index, so we have to subtract the space which + * is reserved for the index. + */ + subtract_lebs = min_idx_lebs; + + /* Take into account that GC reserves one LEB for its own needs */ + subtract_lebs += 1; + + /* + * The GC journal head LEB is not really accessible. And since + * different write types go to different heads, we may count only on + * one head's space. + */ + subtract_lebs += c->jhead_cnt - 1; + + /* We also reserve one LEB for deletions, which bypass budgeting */ + subtract_lebs += 1; + + available -= (long long)subtract_lebs * c->leb_size; + + /* Subtract the dead space which is not available for use */ + available -= c->lst.total_dead; + + /* + * Subtract dark space, which might or might not be usable - it depends + * on the data which we have on the media and which will be written. If + * this is a lot of uncompressed or not-compressible data, the dark + * space cannot be used. + */ + available -= c->lst.total_dark; + + /* + * However, there is more dark space. The index may be bigger than + * @min_idx_lebs. Those extra LEBs are assumed to be available, but + * their dark space is not included in total_dark, so it is subtracted + * here. + */ + if (c->lst.idx_lebs > min_idx_lebs) { + subtract_lebs = c->lst.idx_lebs - min_idx_lebs; + available -= subtract_lebs * c->dark_wm; + } + + /* The calculations are rough and may end up with a negative number */ + return available > 0 ? available : 0; +} + +/** + * can_use_rp - check whether the user is allowed to use reserved pool. + * @c: UBIFS file-system description object + * + * UBIFS has so-called "reserved pool" which is flash space reserved + * for the superuser and for uses whose UID/GID is recorded in UBIFS superblock. + * This function checks whether current user is allowed to use reserved pool. + * Returns %1 current user is allowed to use reserved pool and %0 otherwise. + */ +static int can_use_rp(struct ubifs_info *c) +{ + if (uid_eq(current_fsuid(), c->rp_uid) || capable(CAP_SYS_RESOURCE) || + (!gid_eq(c->rp_gid, GLOBAL_ROOT_GID) && in_group_p(c->rp_gid))) + return 1; + return 0; +} + +/** + * do_budget_space - reserve flash space for index and data growth. + * @c: UBIFS file-system description object + * + * This function makes sure UBIFS has enough free LEBs for index growth and + * data. + * + * When budgeting index space, UBIFS reserves thrice as many LEBs as the index + * would take if it was consolidated and written to the flash. This guarantees + * that the "in-the-gaps" commit method always succeeds and UBIFS will always + * be able to commit dirty index. So this function basically adds amount of + * budgeted index space to the size of the current index, multiplies this by 3, + * and makes sure this does not exceed the amount of free LEBs. + * + * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables: + * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might + * be large, because UBIFS does not do any index consolidation as long as + * there is free space. IOW, the index may take a lot of LEBs, but the LEBs + * will contain a lot of dirt. + * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW, + * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs. + * + * This function returns zero in case of success, and %-ENOSPC in case of + * failure. + */ +static int do_budget_space(struct ubifs_info *c) +{ + long long outstanding, available; + int lebs, rsvd_idx_lebs, min_idx_lebs; + + /* First budget index space */ + min_idx_lebs = ubifs_calc_min_idx_lebs(c); + + /* Now 'min_idx_lebs' contains number of LEBs to reserve */ + if (min_idx_lebs > c->lst.idx_lebs) + rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; + else + rsvd_idx_lebs = 0; + + /* + * The number of LEBs that are available to be used by the index is: + * + * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt - + * @c->lst.taken_empty_lebs + * + * @c->lst.empty_lebs are available because they are empty. + * @c->freeable_cnt are available because they contain only free and + * dirty space, @c->idx_gc_cnt are available because they are index + * LEBs that have been garbage collected and are awaiting the commit + * before they can be used. And the in-the-gaps method will grab these + * if it needs them. @c->lst.taken_empty_lebs are empty LEBs that have + * already been allocated for some purpose. + * + * Note, @c->idx_gc_cnt is included to both @c->lst.empty_lebs (because + * these LEBs are empty) and to @c->lst.taken_empty_lebs (because they + * are taken until after the commit). + * + * Note, @c->lst.taken_empty_lebs may temporarily be higher by one + * because of the way we serialize LEB allocations and budgeting. See a + * comment in 'ubifs_find_free_space()'. + */ + lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - + c->lst.taken_empty_lebs; + if (unlikely(rsvd_idx_lebs > lebs)) { + dbg_budg("out of indexing space: min_idx_lebs %d (old %d), rsvd_idx_lebs %d", + min_idx_lebs, c->bi.min_idx_lebs, rsvd_idx_lebs); + return -ENOSPC; + } + + available = ubifs_calc_available(c, min_idx_lebs); + outstanding = c->bi.data_growth + c->bi.dd_growth; + + if (unlikely(available < outstanding)) { + dbg_budg("out of data space: available %lld, outstanding %lld", + available, outstanding); + return -ENOSPC; + } + + if (available - outstanding <= c->rp_size && !can_use_rp(c)) + return -ENOSPC; + + c->bi.min_idx_lebs = min_idx_lebs; + return 0; +} + +/** + * calc_idx_growth - calculate approximate index growth from budgeting request. + * @c: UBIFS file-system description object + * @req: budgeting request + * + * For now we assume each new node adds one znode. But this is rather poor + * approximation, though. + */ +static int calc_idx_growth(const struct ubifs_info *c, + const struct ubifs_budget_req *req) +{ + int znodes; + + znodes = req->new_ino + (req->new_page << UBIFS_BLOCKS_PER_PAGE_SHIFT) + + req->new_dent; + return znodes * c->max_idx_node_sz; +} + +/** + * calc_data_growth - calculate approximate amount of new data from budgeting + * request. + * @c: UBIFS file-system description object + * @req: budgeting request + */ +static int calc_data_growth(const struct ubifs_info *c, + const struct ubifs_budget_req *req) +{ + int data_growth; + + data_growth = req->new_ino ? c->bi.inode_budget : 0; + if (req->new_page) + data_growth += c->bi.page_budget; + if (req->new_dent) + data_growth += c->bi.dent_budget; + data_growth += req->new_ino_d; + return data_growth; +} + +/** + * calc_dd_growth - calculate approximate amount of data which makes other data + * dirty from budgeting request. + * @c: UBIFS file-system description object + * @req: budgeting request + */ +static int calc_dd_growth(const struct ubifs_info *c, + const struct ubifs_budget_req *req) +{ + int dd_growth; + + dd_growth = req->dirtied_page ? c->bi.page_budget : 0; + + if (req->dirtied_ino) + dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1); + if (req->mod_dent) + dd_growth += c->bi.dent_budget; + dd_growth += req->dirtied_ino_d; + return dd_growth; +} + +/** + * ubifs_budget_space - ensure there is enough space to complete an operation. + * @c: UBIFS file-system description object + * @req: budget request + * + * This function allocates budget for an operation. It uses pessimistic + * approximation of how much flash space the operation needs. The goal of this + * function is to make sure UBIFS always has flash space to flush all dirty + * pages, dirty inodes, and dirty znodes (liability). This function may force + * commit, garbage-collection or write-back. Returns zero in case of success, + * %-ENOSPC if there is no free space and other negative error codes in case of + * failures. + */ +int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) +{ + int uninitialized_var(cmt_retries), uninitialized_var(wb_retries); + int err, idx_growth, data_growth, dd_growth, retried = 0; + + ubifs_assert(req->new_page <= 1); + ubifs_assert(req->dirtied_page <= 1); + ubifs_assert(req->new_dent <= 1); + ubifs_assert(req->mod_dent <= 1); + ubifs_assert(req->new_ino <= 1); + ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA); + ubifs_assert(req->dirtied_ino <= 4); + ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); + ubifs_assert(!(req->new_ino_d & 7)); + ubifs_assert(!(req->dirtied_ino_d & 7)); + + data_growth = calc_data_growth(c, req); + dd_growth = calc_dd_growth(c, req); + if (!data_growth && !dd_growth) + return 0; + idx_growth = calc_idx_growth(c, req); + +again: + spin_lock(&c->space_lock); + ubifs_assert(c->bi.idx_growth >= 0); + ubifs_assert(c->bi.data_growth >= 0); + ubifs_assert(c->bi.dd_growth >= 0); + + if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) { + dbg_budg("no space"); + spin_unlock(&c->space_lock); + return -ENOSPC; + } + + c->bi.idx_growth += idx_growth; + c->bi.data_growth += data_growth; + c->bi.dd_growth += dd_growth; + + err = do_budget_space(c); + if (likely(!err)) { + req->idx_growth = idx_growth; + req->data_growth = data_growth; + req->dd_growth = dd_growth; + spin_unlock(&c->space_lock); + return 0; + } + + /* Restore the old values */ + c->bi.idx_growth -= idx_growth; + c->bi.data_growth -= data_growth; + c->bi.dd_growth -= dd_growth; + spin_unlock(&c->space_lock); + + if (req->fast) { + dbg_budg("no space for fast budgeting"); + return err; + } + + err = make_free_space(c); + cond_resched(); + if (err == -EAGAIN) { + dbg_budg("try again"); + goto again; + } else if (err == -ENOSPC) { + if (!retried) { + retried = 1; + dbg_budg("-ENOSPC, but anyway try once again"); + goto again; + } + dbg_budg("FS is full, -ENOSPC"); + c->bi.nospace = 1; + if (can_use_rp(c) || c->rp_size == 0) + c->bi.nospace_rp = 1; + smp_wmb(); + } else + ubifs_err("cannot budget space, error %d", err); + return err; +} + +/** + * ubifs_release_budget - release budgeted free space. + * @c: UBIFS file-system description object + * @req: budget request + * + * This function releases the space budgeted by 'ubifs_budget_space()'. Note, + * since the index changes (which were budgeted for in @req->idx_growth) will + * only be written to the media on commit, this function moves the index budget + * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed + * by the commit operation. + */ +void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) +{ + ubifs_assert(req->new_page <= 1); + ubifs_assert(req->dirtied_page <= 1); + ubifs_assert(req->new_dent <= 1); + ubifs_assert(req->mod_dent <= 1); + ubifs_assert(req->new_ino <= 1); + ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA); + ubifs_assert(req->dirtied_ino <= 4); + ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); + ubifs_assert(!(req->new_ino_d & 7)); + ubifs_assert(!(req->dirtied_ino_d & 7)); + if (!req->recalculate) { + ubifs_assert(req->idx_growth >= 0); + ubifs_assert(req->data_growth >= 0); + ubifs_assert(req->dd_growth >= 0); + } + + if (req->recalculate) { + req->data_growth = calc_data_growth(c, req); + req->dd_growth = calc_dd_growth(c, req); + req->idx_growth = calc_idx_growth(c, req); + } + + if (!req->data_growth && !req->dd_growth) + return; + + c->bi.nospace = c->bi.nospace_rp = 0; + smp_wmb(); + + spin_lock(&c->space_lock); + c->bi.idx_growth -= req->idx_growth; + c->bi.uncommitted_idx += req->idx_growth; + c->bi.data_growth -= req->data_growth; + c->bi.dd_growth -= req->dd_growth; + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); + + ubifs_assert(c->bi.idx_growth >= 0); + ubifs_assert(c->bi.data_growth >= 0); + ubifs_assert(c->bi.dd_growth >= 0); + ubifs_assert(c->bi.min_idx_lebs < c->main_lebs); + ubifs_assert(!(c->bi.idx_growth & 7)); + ubifs_assert(!(c->bi.data_growth & 7)); + ubifs_assert(!(c->bi.dd_growth & 7)); + spin_unlock(&c->space_lock); +} + +/** + * ubifs_convert_page_budget - convert budget of a new page. + * @c: UBIFS file-system description object + * + * This function converts budget which was allocated for a new page of data to + * the budget of changing an existing page of data. The latter is smaller than + * the former, so this function only does simple re-calculation and does not + * involve any write-back. + */ +void ubifs_convert_page_budget(struct ubifs_info *c) +{ + spin_lock(&c->space_lock); + /* Release the index growth reservation */ + c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; + /* Release the data growth reservation */ + c->bi.data_growth -= c->bi.page_budget; + /* Increase the dirty data growth reservation instead */ + c->bi.dd_growth += c->bi.page_budget; + /* And re-calculate the indexing space reservation */ + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); + spin_unlock(&c->space_lock); +} + +/** + * ubifs_release_dirty_inode_budget - release dirty inode budget. + * @c: UBIFS file-system description object + * @ui: UBIFS inode to release the budget for + * + * This function releases budget corresponding to a dirty inode. It is usually + * called when after the inode has been written to the media and marked as + * clean. It also causes the "no space" flags to be cleared. + */ +void ubifs_release_dirty_inode_budget(struct ubifs_info *c, + struct ubifs_inode *ui) +{ + struct ubifs_budget_req req; + + memset(&req, 0, sizeof(struct ubifs_budget_req)); + /* The "no space" flags will be cleared because dd_growth is > 0 */ + req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8); + ubifs_release_budget(c, &req); +} +#endif + /** * ubifs_reported_space - calculate reported free space. * @c: the UBIFS file-system description object @@ -111,3 +657,75 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free) free *= factor; return div_u64(free, divisor); } + +#ifndef __UBOOT__ +/** + * ubifs_get_free_space_nolock - return amount of free space. + * @c: UBIFS file-system description object + * + * This function calculates amount of free space to report to user-space. + * + * Because UBIFS may introduce substantial overhead (the index, node headers, + * alignment, wastage at the end of LEBs, etc), it cannot report real amount of + * free flash space it has (well, because not all dirty space is reclaimable, + * UBIFS does not actually know the real amount). If UBIFS did so, it would + * bread user expectations about what free space is. Users seem to accustomed + * to assume that if the file-system reports N bytes of free space, they would + * be able to fit a file of N bytes to the FS. This almost works for + * traditional file-systems, because they have way less overhead than UBIFS. + * So, to keep users happy, UBIFS tries to take the overhead into account. + */ +long long ubifs_get_free_space_nolock(struct ubifs_info *c) +{ + int rsvd_idx_lebs, lebs; + long long available, outstanding, free; + + ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c)); + outstanding = c->bi.data_growth + c->bi.dd_growth; + available = ubifs_calc_available(c, c->bi.min_idx_lebs); + + /* + * When reporting free space to user-space, UBIFS guarantees that it is + * possible to write a file of free space size. This means that for + * empty LEBs we may use more precise calculations than + * 'ubifs_calc_available()' is using. Namely, we know that in empty + * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm. + * Thus, amend the available space. + * + * Note, the calculations below are similar to what we have in + * 'do_budget_space()', so refer there for comments. + */ + if (c->bi.min_idx_lebs > c->lst.idx_lebs) + rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; + else + rsvd_idx_lebs = 0; + lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - + c->lst.taken_empty_lebs; + lebs -= rsvd_idx_lebs; + available += lebs * (c->dark_wm - c->leb_overhead); + + if (available > outstanding) + free = ubifs_reported_space(c, available - outstanding); + else + free = 0; + return free; +} + +/** + * ubifs_get_free_space - return amount of free space. + * @c: UBIFS file-system description object + * + * This function calculates and returns amount of free space to report to + * user-space. + */ +long long ubifs_get_free_space(struct ubifs_info *c) +{ + long long free; + + spin_lock(&c->space_lock); + free = ubifs_get_free_space_nolock(c); + spin_unlock(&c->space_lock); + + return free; +} +#endif diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 6afb883..2f50a55 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter @@ -27,28 +16,44 @@ * various local functions of those subsystems. */ -#define UBIFS_DBG_PRESERVE_UBI - +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/module.h> +#include <linux/debugfs.h> +#include <linux/math64.h> +#include <linux/uaccess.h> +#include <linux/random.h> +#else +#include <linux/compat.h> +#include <linux/err.h> +#endif #include "ubifs.h" -#ifdef CONFIG_UBIFS_FS_DEBUG - -DEFINE_SPINLOCK(dbg_lock); +#ifndef __UBOOT__ +static DEFINE_SPINLOCK(dbg_lock); +#endif -static char dbg_key_buf0[128]; -static char dbg_key_buf1[128]; - -unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT; -unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT; -unsigned int ubifs_tst_flags; - -module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); -module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); -module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); +static const char *get_key_fmt(int fmt) +{ + switch (fmt) { + case UBIFS_SIMPLE_KEY_FMT: + return "simple"; + default: + return "unknown/invalid format"; + } +} -MODULE_PARM_DESC(debug_msgs, "Debug message type flags"); -MODULE_PARM_DESC(debug_chks, "Debug check flags"); -MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); +static const char *get_key_hash(int hash) +{ + switch (hash) { + case UBIFS_KEY_HASH_R5: + return "R5"; + case UBIFS_KEY_HASH_TEST: + return "test"; + default: + return "unknown/invalid name hash"; + } +} static const char *get_key_type(int type) { @@ -68,8 +73,32 @@ static const char *get_key_type(int type) } } -static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, - char *buffer) +#ifndef __UBOOT__ +static const char *get_dent_type(int type) +{ + switch (type) { + case UBIFS_ITYPE_REG: + return "file"; + case UBIFS_ITYPE_DIR: + return "dir"; + case UBIFS_ITYPE_LNK: + return "symlink"; + case UBIFS_ITYPE_BLK: + return "blkdev"; + case UBIFS_ITYPE_CHR: + return "char dev"; + case UBIFS_ITYPE_FIFO: + return "fifo"; + case UBIFS_ITYPE_SOCK: + return "socket"; + default: + return "unknown/invalid type"; + } +} +#endif + +const char *dbg_snprintf_key(const struct ubifs_info *c, + const union ubifs_key *key, char *buffer, int len) { char *p = buffer; int type = key_type(c, key); @@ -77,70 +106,3030 @@ static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) { switch (type) { case UBIFS_INO_KEY: - sprintf(p, "(%lu, %s)", (unsigned long)key_inum(c, key), - get_key_type(type)); + len -= snprintf(p, len, "(%lu, %s)", + (unsigned long)key_inum(c, key), + get_key_type(type)); break; case UBIFS_DENT_KEY: case UBIFS_XENT_KEY: - sprintf(p, "(%lu, %s, %#08x)", - (unsigned long)key_inum(c, key), - get_key_type(type), key_hash(c, key)); + len -= snprintf(p, len, "(%lu, %s, %#08x)", + (unsigned long)key_inum(c, key), + get_key_type(type), key_hash(c, key)); break; case UBIFS_DATA_KEY: - sprintf(p, "(%lu, %s, %u)", - (unsigned long)key_inum(c, key), - get_key_type(type), key_block(c, key)); + len -= snprintf(p, len, "(%lu, %s, %u)", + (unsigned long)key_inum(c, key), + get_key_type(type), key_block(c, key)); break; case UBIFS_TRUN_KEY: - sprintf(p, "(%lu, %s)", - (unsigned long)key_inum(c, key), - get_key_type(type)); + len -= snprintf(p, len, "(%lu, %s)", + (unsigned long)key_inum(c, key), + get_key_type(type)); break; default: - sprintf(p, "(bad key type: %#08x, %#08x)", - key->u32[0], key->u32[1]); + len -= snprintf(p, len, "(bad key type: %#08x, %#08x)", + key->u32[0], key->u32[1]); } } else - sprintf(p, "bad key format %d", c->key_fmt); + len -= snprintf(p, len, "bad key format %d", c->key_fmt); + ubifs_assert(len > 0); + return p; } -const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key) +const char *dbg_ntype(int type) { - /* dbg_lock must be held */ - sprintf_key(c, key, dbg_key_buf0); - return dbg_key_buf0; + switch (type) { + case UBIFS_PAD_NODE: + return "padding node"; + case UBIFS_SB_NODE: + return "superblock node"; + case UBIFS_MST_NODE: + return "master node"; + case UBIFS_REF_NODE: + return "reference node"; + case UBIFS_INO_NODE: + return "inode node"; + case UBIFS_DENT_NODE: + return "direntry node"; + case UBIFS_XENT_NODE: + return "xentry node"; + case UBIFS_DATA_NODE: + return "data node"; + case UBIFS_TRUN_NODE: + return "truncate node"; + case UBIFS_IDX_NODE: + return "indexing node"; + case UBIFS_CS_NODE: + return "commit start node"; + case UBIFS_ORPH_NODE: + return "orphan node"; + default: + return "unknown node"; + } } -const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key) +static const char *dbg_gtype(int type) { - /* dbg_lock must be held */ - sprintf_key(c, key, dbg_key_buf1); - return dbg_key_buf1; + switch (type) { + case UBIFS_NO_NODE_GROUP: + return "no node group"; + case UBIFS_IN_NODE_GROUP: + return "in node group"; + case UBIFS_LAST_OF_NODE_GROUP: + return "last of node group"; + default: + return "unknown"; + } +} + +const char *dbg_cstate(int cmt_state) +{ + switch (cmt_state) { + case COMMIT_RESTING: + return "commit resting"; + case COMMIT_BACKGROUND: + return "background commit requested"; + case COMMIT_REQUIRED: + return "commit required"; + case COMMIT_RUNNING_BACKGROUND: + return "BACKGROUND commit running"; + case COMMIT_RUNNING_REQUIRED: + return "commit running and required"; + case COMMIT_BROKEN: + return "broken commit"; + default: + return "unknown commit state"; + } +} + +const char *dbg_jhead(int jhead) +{ + switch (jhead) { + case GCHD: + return "0 (GC)"; + case BASEHD: + return "1 (base)"; + case DATAHD: + return "2 (data)"; + default: + return "unknown journal head"; + } +} + +static void dump_ch(const struct ubifs_ch *ch) +{ + pr_err("\tmagic %#x\n", le32_to_cpu(ch->magic)); + pr_err("\tcrc %#x\n", le32_to_cpu(ch->crc)); + pr_err("\tnode_type %d (%s)\n", ch->node_type, + dbg_ntype(ch->node_type)); + pr_err("\tgroup_type %d (%s)\n", ch->group_type, + dbg_gtype(ch->group_type)); + pr_err("\tsqnum %llu\n", + (unsigned long long)le64_to_cpu(ch->sqnum)); + pr_err("\tlen %u\n", le32_to_cpu(ch->len)); +} + +void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) +{ +#ifndef __UBOOT__ + const struct ubifs_inode *ui = ubifs_inode(inode); + struct qstr nm = { .name = NULL }; + union ubifs_key key; + struct ubifs_dent_node *dent, *pdent = NULL; + int count = 2; + + pr_err("Dump in-memory inode:"); + pr_err("\tinode %lu\n", inode->i_ino); + pr_err("\tsize %llu\n", + (unsigned long long)i_size_read(inode)); + pr_err("\tnlink %u\n", inode->i_nlink); + pr_err("\tuid %u\n", (unsigned int)i_uid_read(inode)); + pr_err("\tgid %u\n", (unsigned int)i_gid_read(inode)); + pr_err("\tatime %u.%u\n", + (unsigned int)inode->i_atime.tv_sec, + (unsigned int)inode->i_atime.tv_nsec); + pr_err("\tmtime %u.%u\n", + (unsigned int)inode->i_mtime.tv_sec, + (unsigned int)inode->i_mtime.tv_nsec); + pr_err("\tctime %u.%u\n", + (unsigned int)inode->i_ctime.tv_sec, + (unsigned int)inode->i_ctime.tv_nsec); + pr_err("\tcreat_sqnum %llu\n", ui->creat_sqnum); + pr_err("\txattr_size %u\n", ui->xattr_size); + pr_err("\txattr_cnt %u\n", ui->xattr_cnt); + pr_err("\txattr_names %u\n", ui->xattr_names); + pr_err("\tdirty %u\n", ui->dirty); + pr_err("\txattr %u\n", ui->xattr); + pr_err("\tbulk_read %u\n", ui->xattr); + pr_err("\tsynced_i_size %llu\n", + (unsigned long long)ui->synced_i_size); + pr_err("\tui_size %llu\n", + (unsigned long long)ui->ui_size); + pr_err("\tflags %d\n", ui->flags); + pr_err("\tcompr_type %d\n", ui->compr_type); + pr_err("\tlast_page_read %lu\n", ui->last_page_read); + pr_err("\tread_in_a_row %lu\n", ui->read_in_a_row); + pr_err("\tdata_len %d\n", ui->data_len); + + if (!S_ISDIR(inode->i_mode)) + return; + + pr_err("List of directory entries:\n"); + ubifs_assert(!mutex_is_locked(&c->tnc_mutex)); + + lowest_dent_key(c, &key, inode->i_ino); + while (1) { + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + if (PTR_ERR(dent) != -ENOENT) + pr_err("error %ld\n", PTR_ERR(dent)); + break; + } + + pr_err("\t%d: %s (%s)\n", + count++, dent->name, get_dent_type(dent->type)); + + nm.name = dent->name; + nm.len = le16_to_cpu(dent->nlen); + kfree(pdent); + pdent = dent; + key_read(c, &dent->key, &key); + } + kfree(pdent); +#endif +} + +void ubifs_dump_node(const struct ubifs_info *c, const void *node) +{ + int i, n; + union ubifs_key key; + const struct ubifs_ch *ch = node; + char key_buf[DBG_KEY_BUF_LEN]; + + /* If the magic is incorrect, just hexdump the first bytes */ + if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { + pr_err("Not a node, first %zu bytes:", UBIFS_CH_SZ); + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1, + (void *)node, UBIFS_CH_SZ, 1); + return; + } + + spin_lock(&dbg_lock); + dump_ch(node); + + switch (ch->node_type) { + case UBIFS_PAD_NODE: + { + const struct ubifs_pad_node *pad = node; + + pr_err("\tpad_len %u\n", le32_to_cpu(pad->pad_len)); + break; + } + case UBIFS_SB_NODE: + { + const struct ubifs_sb_node *sup = node; + unsigned int sup_flags = le32_to_cpu(sup->flags); + + pr_err("\tkey_hash %d (%s)\n", + (int)sup->key_hash, get_key_hash(sup->key_hash)); + pr_err("\tkey_fmt %d (%s)\n", + (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); + pr_err("\tflags %#x\n", sup_flags); + pr_err("\t big_lpt %u\n", + !!(sup_flags & UBIFS_FLG_BIGLPT)); + pr_err("\t space_fixup %u\n", + !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); + pr_err("\tmin_io_size %u\n", le32_to_cpu(sup->min_io_size)); + pr_err("\tleb_size %u\n", le32_to_cpu(sup->leb_size)); + pr_err("\tleb_cnt %u\n", le32_to_cpu(sup->leb_cnt)); + pr_err("\tmax_leb_cnt %u\n", le32_to_cpu(sup->max_leb_cnt)); + pr_err("\tmax_bud_bytes %llu\n", + (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); + pr_err("\tlog_lebs %u\n", le32_to_cpu(sup->log_lebs)); + pr_err("\tlpt_lebs %u\n", le32_to_cpu(sup->lpt_lebs)); + pr_err("\torph_lebs %u\n", le32_to_cpu(sup->orph_lebs)); + pr_err("\tjhead_cnt %u\n", le32_to_cpu(sup->jhead_cnt)); + pr_err("\tfanout %u\n", le32_to_cpu(sup->fanout)); + pr_err("\tlsave_cnt %u\n", le32_to_cpu(sup->lsave_cnt)); + pr_err("\tdefault_compr %u\n", + (int)le16_to_cpu(sup->default_compr)); + pr_err("\trp_size %llu\n", + (unsigned long long)le64_to_cpu(sup->rp_size)); + pr_err("\trp_uid %u\n", le32_to_cpu(sup->rp_uid)); + pr_err("\trp_gid %u\n", le32_to_cpu(sup->rp_gid)); + pr_err("\tfmt_version %u\n", le32_to_cpu(sup->fmt_version)); + pr_err("\ttime_gran %u\n", le32_to_cpu(sup->time_gran)); + pr_err("\tUUID %pUB\n", sup->uuid); + break; + } + case UBIFS_MST_NODE: + { + const struct ubifs_mst_node *mst = node; + + pr_err("\thighest_inum %llu\n", + (unsigned long long)le64_to_cpu(mst->highest_inum)); + pr_err("\tcommit number %llu\n", + (unsigned long long)le64_to_cpu(mst->cmt_no)); + pr_err("\tflags %#x\n", le32_to_cpu(mst->flags)); + pr_err("\tlog_lnum %u\n", le32_to_cpu(mst->log_lnum)); + pr_err("\troot_lnum %u\n", le32_to_cpu(mst->root_lnum)); + pr_err("\troot_offs %u\n", le32_to_cpu(mst->root_offs)); + pr_err("\troot_len %u\n", le32_to_cpu(mst->root_len)); + pr_err("\tgc_lnum %u\n", le32_to_cpu(mst->gc_lnum)); + pr_err("\tihead_lnum %u\n", le32_to_cpu(mst->ihead_lnum)); + pr_err("\tihead_offs %u\n", le32_to_cpu(mst->ihead_offs)); + pr_err("\tindex_size %llu\n", + (unsigned long long)le64_to_cpu(mst->index_size)); + pr_err("\tlpt_lnum %u\n", le32_to_cpu(mst->lpt_lnum)); + pr_err("\tlpt_offs %u\n", le32_to_cpu(mst->lpt_offs)); + pr_err("\tnhead_lnum %u\n", le32_to_cpu(mst->nhead_lnum)); + pr_err("\tnhead_offs %u\n", le32_to_cpu(mst->nhead_offs)); + pr_err("\tltab_lnum %u\n", le32_to_cpu(mst->ltab_lnum)); + pr_err("\tltab_offs %u\n", le32_to_cpu(mst->ltab_offs)); + pr_err("\tlsave_lnum %u\n", le32_to_cpu(mst->lsave_lnum)); + pr_err("\tlsave_offs %u\n", le32_to_cpu(mst->lsave_offs)); + pr_err("\tlscan_lnum %u\n", le32_to_cpu(mst->lscan_lnum)); + pr_err("\tleb_cnt %u\n", le32_to_cpu(mst->leb_cnt)); + pr_err("\tempty_lebs %u\n", le32_to_cpu(mst->empty_lebs)); + pr_err("\tidx_lebs %u\n", le32_to_cpu(mst->idx_lebs)); + pr_err("\ttotal_free %llu\n", + (unsigned long long)le64_to_cpu(mst->total_free)); + pr_err("\ttotal_dirty %llu\n", + (unsigned long long)le64_to_cpu(mst->total_dirty)); + pr_err("\ttotal_used %llu\n", + (unsigned long long)le64_to_cpu(mst->total_used)); + pr_err("\ttotal_dead %llu\n", + (unsigned long long)le64_to_cpu(mst->total_dead)); + pr_err("\ttotal_dark %llu\n", + (unsigned long long)le64_to_cpu(mst->total_dark)); + break; + } + case UBIFS_REF_NODE: + { + const struct ubifs_ref_node *ref = node; + + pr_err("\tlnum %u\n", le32_to_cpu(ref->lnum)); + pr_err("\toffs %u\n", le32_to_cpu(ref->offs)); + pr_err("\tjhead %u\n", le32_to_cpu(ref->jhead)); + break; + } + case UBIFS_INO_NODE: + { + const struct ubifs_ino_node *ino = node; + + key_read(c, &ino->key, &key); + pr_err("\tkey %s\n", + dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); + pr_err("\tcreat_sqnum %llu\n", + (unsigned long long)le64_to_cpu(ino->creat_sqnum)); + pr_err("\tsize %llu\n", + (unsigned long long)le64_to_cpu(ino->size)); + pr_err("\tnlink %u\n", le32_to_cpu(ino->nlink)); + pr_err("\tatime %lld.%u\n", + (long long)le64_to_cpu(ino->atime_sec), + le32_to_cpu(ino->atime_nsec)); + pr_err("\tmtime %lld.%u\n", + (long long)le64_to_cpu(ino->mtime_sec), + le32_to_cpu(ino->mtime_nsec)); + pr_err("\tctime %lld.%u\n", + (long long)le64_to_cpu(ino->ctime_sec), + le32_to_cpu(ino->ctime_nsec)); + pr_err("\tuid %u\n", le32_to_cpu(ino->uid)); + pr_err("\tgid %u\n", le32_to_cpu(ino->gid)); + pr_err("\tmode %u\n", le32_to_cpu(ino->mode)); + pr_err("\tflags %#x\n", le32_to_cpu(ino->flags)); + pr_err("\txattr_cnt %u\n", le32_to_cpu(ino->xattr_cnt)); + pr_err("\txattr_size %u\n", le32_to_cpu(ino->xattr_size)); + pr_err("\txattr_names %u\n", le32_to_cpu(ino->xattr_names)); + pr_err("\tcompr_type %#x\n", + (int)le16_to_cpu(ino->compr_type)); + pr_err("\tdata len %u\n", le32_to_cpu(ino->data_len)); + break; + } + case UBIFS_DENT_NODE: + case UBIFS_XENT_NODE: + { + const struct ubifs_dent_node *dent = node; + int nlen = le16_to_cpu(dent->nlen); + + key_read(c, &dent->key, &key); + pr_err("\tkey %s\n", + dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); + pr_err("\tinum %llu\n", + (unsigned long long)le64_to_cpu(dent->inum)); + pr_err("\ttype %d\n", (int)dent->type); + pr_err("\tnlen %d\n", nlen); + pr_err("\tname "); + + if (nlen > UBIFS_MAX_NLEN) + pr_err("(bad name length, not printing, bad or corrupted node)"); + else { + for (i = 0; i < nlen && dent->name[i]; i++) + pr_cont("%c", dent->name[i]); + } + pr_cont("\n"); + + break; + } + case UBIFS_DATA_NODE: + { + const struct ubifs_data_node *dn = node; + int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; + + key_read(c, &dn->key, &key); + pr_err("\tkey %s\n", + dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); + pr_err("\tsize %u\n", le32_to_cpu(dn->size)); + pr_err("\tcompr_typ %d\n", + (int)le16_to_cpu(dn->compr_type)); + pr_err("\tdata size %d\n", dlen); + pr_err("\tdata:\n"); + print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1, + (void *)&dn->data, dlen, 0); + break; + } + case UBIFS_TRUN_NODE: + { + const struct ubifs_trun_node *trun = node; + + pr_err("\tinum %u\n", le32_to_cpu(trun->inum)); + pr_err("\told_size %llu\n", + (unsigned long long)le64_to_cpu(trun->old_size)); + pr_err("\tnew_size %llu\n", + (unsigned long long)le64_to_cpu(trun->new_size)); + break; + } + case UBIFS_IDX_NODE: + { + const struct ubifs_idx_node *idx = node; + + n = le16_to_cpu(idx->child_cnt); + pr_err("\tchild_cnt %d\n", n); + pr_err("\tlevel %d\n", (int)le16_to_cpu(idx->level)); + pr_err("\tBranches:\n"); + + for (i = 0; i < n && i < c->fanout - 1; i++) { + const struct ubifs_branch *br; + + br = ubifs_idx_branch(c, idx, i); + key_read(c, &br->key, &key); + pr_err("\t%d: LEB %d:%d len %d key %s\n", + i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), + le32_to_cpu(br->len), + dbg_snprintf_key(c, &key, key_buf, + DBG_KEY_BUF_LEN)); + } + break; + } + case UBIFS_CS_NODE: + break; + case UBIFS_ORPH_NODE: + { + const struct ubifs_orph_node *orph = node; + + pr_err("\tcommit number %llu\n", + (unsigned long long) + le64_to_cpu(orph->cmt_no) & LLONG_MAX); + pr_err("\tlast node flag %llu\n", + (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); + n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; + pr_err("\t%d orphan inode numbers:\n", n); + for (i = 0; i < n; i++) + pr_err("\t ino %llu\n", + (unsigned long long)le64_to_cpu(orph->inos[i])); + break; + } + default: + pr_err("node type %d was not recognized\n", + (int)ch->node_type); + } + spin_unlock(&dbg_lock); +} + +void ubifs_dump_budget_req(const struct ubifs_budget_req *req) +{ + spin_lock(&dbg_lock); + pr_err("Budgeting request: new_ino %d, dirtied_ino %d\n", + req->new_ino, req->dirtied_ino); + pr_err("\tnew_ino_d %d, dirtied_ino_d %d\n", + req->new_ino_d, req->dirtied_ino_d); + pr_err("\tnew_page %d, dirtied_page %d\n", + req->new_page, req->dirtied_page); + pr_err("\tnew_dent %d, mod_dent %d\n", + req->new_dent, req->mod_dent); + pr_err("\tidx_growth %d\n", req->idx_growth); + pr_err("\tdata_growth %d dd_growth %d\n", + req->data_growth, req->dd_growth); + spin_unlock(&dbg_lock); +} + +void ubifs_dump_lstats(const struct ubifs_lp_stats *lst) +{ + spin_lock(&dbg_lock); + pr_err("(pid %d) Lprops statistics: empty_lebs %d, idx_lebs %d\n", + current->pid, lst->empty_lebs, lst->idx_lebs); + pr_err("\ttaken_empty_lebs %d, total_free %lld, total_dirty %lld\n", + lst->taken_empty_lebs, lst->total_free, lst->total_dirty); + pr_err("\ttotal_used %lld, total_dark %lld, total_dead %lld\n", + lst->total_used, lst->total_dark, lst->total_dead); + spin_unlock(&dbg_lock); +} + +#ifndef __UBOOT__ +void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) +{ + int i; + struct rb_node *rb; + struct ubifs_bud *bud; + struct ubifs_gced_idx_leb *idx_gc; + long long available, outstanding, free; + + spin_lock(&c->space_lock); + spin_lock(&dbg_lock); + pr_err("(pid %d) Budgeting info: data budget sum %lld, total budget sum %lld\n", + current->pid, bi->data_growth + bi->dd_growth, + bi->data_growth + bi->dd_growth + bi->idx_growth); + pr_err("\tbudg_data_growth %lld, budg_dd_growth %lld, budg_idx_growth %lld\n", + bi->data_growth, bi->dd_growth, bi->idx_growth); + pr_err("\tmin_idx_lebs %d, old_idx_sz %llu, uncommitted_idx %lld\n", + bi->min_idx_lebs, bi->old_idx_sz, bi->uncommitted_idx); + pr_err("\tpage_budget %d, inode_budget %d, dent_budget %d\n", + bi->page_budget, bi->inode_budget, bi->dent_budget); + pr_err("\tnospace %u, nospace_rp %u\n", bi->nospace, bi->nospace_rp); + pr_err("\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", + c->dark_wm, c->dead_wm, c->max_idx_node_sz); + + if (bi != &c->bi) + /* + * If we are dumping saved budgeting data, do not print + * additional information which is about the current state, not + * the old one which corresponded to the saved budgeting data. + */ + goto out_unlock; + + pr_err("\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", + c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); + pr_err("\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, clean_zn_cnt %ld\n", + atomic_long_read(&c->dirty_pg_cnt), + atomic_long_read(&c->dirty_zn_cnt), + atomic_long_read(&c->clean_zn_cnt)); + pr_err("\tgc_lnum %d, ihead_lnum %d\n", c->gc_lnum, c->ihead_lnum); + + /* If we are in R/O mode, journal heads do not exist */ + if (c->jheads) + for (i = 0; i < c->jhead_cnt; i++) + pr_err("\tjhead %s\t LEB %d\n", + dbg_jhead(c->jheads[i].wbuf.jhead), + c->jheads[i].wbuf.lnum); + for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { + bud = rb_entry(rb, struct ubifs_bud, rb); + pr_err("\tbud LEB %d\n", bud->lnum); + } + list_for_each_entry(bud, &c->old_buds, list) + pr_err("\told bud LEB %d\n", bud->lnum); + list_for_each_entry(idx_gc, &c->idx_gc, list) + pr_err("\tGC'ed idx LEB %d unmap %d\n", + idx_gc->lnum, idx_gc->unmap); + pr_err("\tcommit state %d\n", c->cmt_state); + + /* Print budgeting predictions */ + available = ubifs_calc_available(c, c->bi.min_idx_lebs); + outstanding = c->bi.data_growth + c->bi.dd_growth; + free = ubifs_get_free_space_nolock(c); + pr_err("Budgeting predictions:\n"); + pr_err("\tavailable: %lld, outstanding %lld, free %lld\n", + available, outstanding, free); +out_unlock: + spin_unlock(&dbg_lock); + spin_unlock(&c->space_lock); +} +#else +void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) +{ +} +#endif + +void ubifs_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) +{ + int i, spc, dark = 0, dead = 0; + struct rb_node *rb; + struct ubifs_bud *bud; + + spc = lp->free + lp->dirty; + if (spc < c->dead_wm) + dead = spc; + else + dark = ubifs_calc_dark(c, spc); + + if (lp->flags & LPROPS_INDEX) + pr_err("LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d flags %#x (", + lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc, + lp->flags); + else + pr_err("LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d flags %#-4x (", + lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc, + dark, dead, (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags); + + if (lp->flags & LPROPS_TAKEN) { + if (lp->flags & LPROPS_INDEX) + pr_cont("index, taken"); + else + pr_cont("taken"); + } else { + const char *s; + + if (lp->flags & LPROPS_INDEX) { + switch (lp->flags & LPROPS_CAT_MASK) { + case LPROPS_DIRTY_IDX: + s = "dirty index"; + break; + case LPROPS_FRDI_IDX: + s = "freeable index"; + break; + default: + s = "index"; + } + } else { + switch (lp->flags & LPROPS_CAT_MASK) { + case LPROPS_UNCAT: + s = "not categorized"; + break; + case LPROPS_DIRTY: + s = "dirty"; + break; + case LPROPS_FREE: + s = "free"; + break; + case LPROPS_EMPTY: + s = "empty"; + break; + case LPROPS_FREEABLE: + s = "freeable"; + break; + default: + s = NULL; + break; + } + } + pr_cont("%s", s); + } + + for (rb = rb_first((struct rb_root *)&c->buds); rb; rb = rb_next(rb)) { + bud = rb_entry(rb, struct ubifs_bud, rb); + if (bud->lnum == lp->lnum) { + int head = 0; + for (i = 0; i < c->jhead_cnt; i++) { + /* + * Note, if we are in R/O mode or in the middle + * of mounting/re-mounting, the write-buffers do + * not exist. + */ + if (c->jheads && + lp->lnum == c->jheads[i].wbuf.lnum) { + pr_cont(", jhead %s", dbg_jhead(i)); + head = 1; + } + } + if (!head) + pr_cont(", bud of jhead %s", + dbg_jhead(bud->jhead)); + } + } + if (lp->lnum == c->gc_lnum) + pr_cont(", GC LEB"); + pr_cont(")\n"); +} + +void ubifs_dump_lprops(struct ubifs_info *c) +{ + int lnum, err; + struct ubifs_lprops lp; + struct ubifs_lp_stats lst; + + pr_err("(pid %d) start dumping LEB properties\n", current->pid); + ubifs_get_lp_stats(c, &lst); + ubifs_dump_lstats(&lst); + + for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { + err = ubifs_read_one_lp(c, lnum, &lp); + if (err) + ubifs_err("cannot read lprops for LEB %d", lnum); + + ubifs_dump_lprop(c, &lp); + } + pr_err("(pid %d) finish dumping LEB properties\n", current->pid); +} + +void ubifs_dump_lpt_info(struct ubifs_info *c) +{ + int i; + + spin_lock(&dbg_lock); + pr_err("(pid %d) dumping LPT information\n", current->pid); + pr_err("\tlpt_sz: %lld\n", c->lpt_sz); + pr_err("\tpnode_sz: %d\n", c->pnode_sz); + pr_err("\tnnode_sz: %d\n", c->nnode_sz); + pr_err("\tltab_sz: %d\n", c->ltab_sz); + pr_err("\tlsave_sz: %d\n", c->lsave_sz); + pr_err("\tbig_lpt: %d\n", c->big_lpt); + pr_err("\tlpt_hght: %d\n", c->lpt_hght); + pr_err("\tpnode_cnt: %d\n", c->pnode_cnt); + pr_err("\tnnode_cnt: %d\n", c->nnode_cnt); + pr_err("\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); + pr_err("\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); + pr_err("\tlsave_cnt: %d\n", c->lsave_cnt); + pr_err("\tspace_bits: %d\n", c->space_bits); + pr_err("\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); + pr_err("\tlpt_offs_bits: %d\n", c->lpt_offs_bits); + pr_err("\tlpt_spc_bits: %d\n", c->lpt_spc_bits); + pr_err("\tpcnt_bits: %d\n", c->pcnt_bits); + pr_err("\tlnum_bits: %d\n", c->lnum_bits); + pr_err("\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); + pr_err("\tLPT head is at %d:%d\n", + c->nhead_lnum, c->nhead_offs); + pr_err("\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs); + if (c->big_lpt) + pr_err("\tLPT lsave is at %d:%d\n", + c->lsave_lnum, c->lsave_offs); + for (i = 0; i < c->lpt_lebs; i++) + pr_err("\tLPT LEB %d free %d dirty %d tgc %d cmt %d\n", + i + c->lpt_first, c->ltab[i].free, c->ltab[i].dirty, + c->ltab[i].tgc, c->ltab[i].cmt); + spin_unlock(&dbg_lock); +} + +void ubifs_dump_sleb(const struct ubifs_info *c, + const struct ubifs_scan_leb *sleb, int offs) +{ + struct ubifs_scan_node *snod; + + pr_err("(pid %d) start dumping scanned data from LEB %d:%d\n", + current->pid, sleb->lnum, offs); + + list_for_each_entry(snod, &sleb->nodes, list) { + cond_resched(); + pr_err("Dumping node at LEB %d:%d len %d\n", + sleb->lnum, snod->offs, snod->len); + ubifs_dump_node(c, snod->node); + } +} + +void ubifs_dump_leb(const struct ubifs_info *c, int lnum) +{ + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + void *buf; + + pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum); + + buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + if (!buf) { + ubifs_err("cannot allocate memory for dumping LEB %d", lnum); + return; + } + + sleb = ubifs_scan(c, lnum, 0, buf, 0); + if (IS_ERR(sleb)) { + ubifs_err("scan error %d", (int)PTR_ERR(sleb)); + goto out; + } + + pr_err("LEB %d has %d nodes ending at %d\n", lnum, + sleb->nodes_cnt, sleb->endpt); + + list_for_each_entry(snod, &sleb->nodes, list) { + cond_resched(); + pr_err("Dumping node at LEB %d:%d len %d\n", lnum, + snod->offs, snod->len); + ubifs_dump_node(c, snod->node); + } + + pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum); + ubifs_scan_destroy(sleb); + +out: + vfree(buf); + return; +} + +void ubifs_dump_znode(const struct ubifs_info *c, + const struct ubifs_znode *znode) +{ + int n; + const struct ubifs_zbranch *zbr; + char key_buf[DBG_KEY_BUF_LEN]; + + spin_lock(&dbg_lock); + if (znode->parent) + zbr = &znode->parent->zbranch[znode->iip]; + else + zbr = &c->zroot; + + pr_err("znode %p, LEB %d:%d len %d parent %p iip %d level %d child_cnt %d flags %lx\n", + znode, zbr->lnum, zbr->offs, zbr->len, znode->parent, znode->iip, + znode->level, znode->child_cnt, znode->flags); + + if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) { + spin_unlock(&dbg_lock); + return; + } + + pr_err("zbranches:\n"); + for (n = 0; n < znode->child_cnt; n++) { + zbr = &znode->zbranch[n]; + if (znode->level > 0) + pr_err("\t%d: znode %p LEB %d:%d len %d key %s\n", + n, zbr->znode, zbr->lnum, zbr->offs, zbr->len, + dbg_snprintf_key(c, &zbr->key, key_buf, + DBG_KEY_BUF_LEN)); + else + pr_err("\t%d: LNC %p LEB %d:%d len %d key %s\n", + n, zbr->znode, zbr->lnum, zbr->offs, zbr->len, + dbg_snprintf_key(c, &zbr->key, key_buf, + DBG_KEY_BUF_LEN)); + } + spin_unlock(&dbg_lock); +} + +void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) +{ + int i; + + pr_err("(pid %d) start dumping heap cat %d (%d elements)\n", + current->pid, cat, heap->cnt); + for (i = 0; i < heap->cnt; i++) { + struct ubifs_lprops *lprops = heap->arr[i]; + + pr_err("\t%d. LEB %d hpos %d free %d dirty %d flags %d\n", + i, lprops->lnum, lprops->hpos, lprops->free, + lprops->dirty, lprops->flags); + } + pr_err("(pid %d) finish dumping heap\n", current->pid); +} + +void ubifs_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, + struct ubifs_nnode *parent, int iip) +{ + int i; + + pr_err("(pid %d) dumping pnode:\n", current->pid); + pr_err("\taddress %zx parent %zx cnext %zx\n", + (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); + pr_err("\tflags %lu iip %d level %d num %d\n", + pnode->flags, iip, pnode->level, pnode->num); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_lprops *lp = &pnode->lprops[i]; + + pr_err("\t%d: free %d dirty %d flags %d lnum %d\n", + i, lp->free, lp->dirty, lp->flags, lp->lnum); + } +} + +void ubifs_dump_tnc(struct ubifs_info *c) +{ + struct ubifs_znode *znode; + int level; + + pr_err("\n"); + pr_err("(pid %d) start dumping TNC tree\n", current->pid); + znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); + level = znode->level; + pr_err("== Level %d ==\n", level); + while (znode) { + if (level != znode->level) { + level = znode->level; + pr_err("== Level %d ==\n", level); + } + ubifs_dump_znode(c, znode); + znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); + } + pr_err("(pid %d) finish dumping TNC tree\n", current->pid); +} + +static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, + void *priv) +{ + ubifs_dump_znode(c, znode); + return 0; } /** - * ubifs_debugging_init - initialize UBIFS debugging. + * ubifs_dump_index - dump the on-flash index. * @c: UBIFS file-system description object * - * This function initializes debugging-related data for the file system. + * This function dumps whole UBIFS indexing B-tree, unlike 'ubifs_dump_tnc()' + * which dumps only in-memory znodes and does not read znodes which from flash. + */ +void ubifs_dump_index(struct ubifs_info *c) +{ + dbg_walk_index(c, NULL, dump_znode, NULL); +} + +#ifndef __UBOOT__ +/** + * dbg_save_space_info - save information about flash space. + * @c: UBIFS file-system description object + * + * This function saves information about UBIFS free space, dirty space, etc, in + * order to check it later. + */ +void dbg_save_space_info(struct ubifs_info *c) +{ + struct ubifs_debug_info *d = c->dbg; + int freeable_cnt; + + spin_lock(&c->space_lock); + memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats)); + memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info)); + d->saved_idx_gc_cnt = c->idx_gc_cnt; + + /* + * We use a dirty hack here and zero out @c->freeable_cnt, because it + * affects the free space calculations, and UBIFS might not know about + * all freeable eraseblocks. Indeed, we know about freeable eraseblocks + * only when we read their lprops, and we do this only lazily, upon the + * need. So at any given point of time @c->freeable_cnt might be not + * exactly accurate. + * + * Just one example about the issue we hit when we did not zero + * @c->freeable_cnt. + * 1. The file-system is mounted R/O, c->freeable_cnt is %0. We save the + * amount of free space in @d->saved_free + * 2. We re-mount R/W, which makes UBIFS to read the "lsave" + * information from flash, where we cache LEBs from various + * categories ('ubifs_remount_fs()' -> 'ubifs_lpt_init()' + * -> 'lpt_init_wr()' -> 'read_lsave()' -> 'ubifs_lpt_lookup()' + * -> 'ubifs_get_pnode()' -> 'update_cats()' + * -> 'ubifs_add_to_cat()'). + * 3. Lsave contains a freeable eraseblock, and @c->freeable_cnt + * becomes %1. + * 4. We calculate the amount of free space when the re-mount is + * finished in 'dbg_check_space_info()' and it does not match + * @d->saved_free. + */ + freeable_cnt = c->freeable_cnt; + c->freeable_cnt = 0; + d->saved_free = ubifs_get_free_space_nolock(c); + c->freeable_cnt = freeable_cnt; + spin_unlock(&c->space_lock); +} + +/** + * dbg_check_space_info - check flash space information. + * @c: UBIFS file-system description object + * + * This function compares current flash space information with the information + * which was saved when the 'dbg_save_space_info()' function was called. + * Returns zero if the information has not changed, and %-EINVAL it it has + * changed. + */ +int dbg_check_space_info(struct ubifs_info *c) +{ + struct ubifs_debug_info *d = c->dbg; + struct ubifs_lp_stats lst; + long long free; + int freeable_cnt; + + spin_lock(&c->space_lock); + freeable_cnt = c->freeable_cnt; + c->freeable_cnt = 0; + free = ubifs_get_free_space_nolock(c); + c->freeable_cnt = freeable_cnt; + spin_unlock(&c->space_lock); + + if (free != d->saved_free) { + ubifs_err("free space changed from %lld to %lld", + d->saved_free, free); + goto out; + } + + return 0; + +out: + ubifs_msg("saved lprops statistics dump"); + ubifs_dump_lstats(&d->saved_lst); + ubifs_msg("saved budgeting info dump"); + ubifs_dump_budg(c, &d->saved_bi); + ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt); + ubifs_msg("current lprops statistics dump"); + ubifs_get_lp_stats(c, &lst); + ubifs_dump_lstats(&lst); + ubifs_msg("current budgeting info dump"); + ubifs_dump_budg(c, &c->bi); + dump_stack(); + return -EINVAL; +} + +/** + * dbg_check_synced_i_size - check synchronized inode size. + * @c: UBIFS file-system description object + * @inode: inode to check + * + * If inode is clean, synchronized inode size has to be equivalent to current + * inode size. This function has to be called only for locked inodes (@i_mutex + * has to be locked). Returns %0 if synchronized inode size if correct, and + * %-EINVAL if not. + */ +int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode) +{ + int err = 0; + struct ubifs_inode *ui = ubifs_inode(inode); + + if (!dbg_is_chk_gen(c)) + return 0; + if (!S_ISREG(inode->i_mode)) + return 0; + + mutex_lock(&ui->ui_mutex); + spin_lock(&ui->ui_lock); + if (ui->ui_size != ui->synced_i_size && !ui->dirty) { + ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode is clean", + ui->ui_size, ui->synced_i_size); + ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino, + inode->i_mode, i_size_read(inode)); + dump_stack(); + err = -EINVAL; + } + spin_unlock(&ui->ui_lock); + mutex_unlock(&ui->ui_mutex); + return err; +} + +/* + * dbg_check_dir - check directory inode size and link count. + * @c: UBIFS file-system description object + * @dir: the directory to calculate size for + * @size: the result is returned here + * + * This function makes sure that directory size and link count are correct. * Returns zero in case of success and a negative error code in case of * failure. + * + * Note, it is good idea to make sure the @dir->i_mutex is locked before + * calling this function. + */ +int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) +{ + unsigned int nlink = 2; + union ubifs_key key; + struct ubifs_dent_node *dent, *pdent = NULL; + struct qstr nm = { .name = NULL }; + loff_t size = UBIFS_INO_NODE_SZ; + + if (!dbg_is_chk_gen(c)) + return 0; + + if (!S_ISDIR(dir->i_mode)) + return 0; + + lowest_dent_key(c, &key, dir->i_ino); + while (1) { + int err; + + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + if (err == -ENOENT) + break; + return err; + } + + nm.name = dent->name; + nm.len = le16_to_cpu(dent->nlen); + size += CALC_DENT_SIZE(nm.len); + if (dent->type == UBIFS_ITYPE_DIR) + nlink += 1; + kfree(pdent); + pdent = dent; + key_read(c, &dent->key, &key); + } + kfree(pdent); + + if (i_size_read(dir) != size) { + ubifs_err("directory inode %lu has size %llu, but calculated size is %llu", + dir->i_ino, (unsigned long long)i_size_read(dir), + (unsigned long long)size); + ubifs_dump_inode(c, dir); + dump_stack(); + return -EINVAL; + } + if (dir->i_nlink != nlink) { + ubifs_err("directory inode %lu has nlink %u, but calculated nlink is %u", + dir->i_ino, dir->i_nlink, nlink); + ubifs_dump_inode(c, dir); + dump_stack(); + return -EINVAL; + } + + return 0; +} + +/** + * dbg_check_key_order - make sure that colliding keys are properly ordered. + * @c: UBIFS file-system description object + * @zbr1: first zbranch + * @zbr2: following zbranch + * + * In UBIFS indexing B-tree colliding keys has to be sorted in binary order of + * names of the direntries/xentries which are referred by the keys. This + * function reads direntries/xentries referred by @zbr1 and @zbr2 and makes + * sure the name of direntry/xentry referred by @zbr1 is less than + * direntry/xentry referred by @zbr2. Returns zero if this is true, %1 if not, + * and a negative error code in case of failure. */ +static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, + struct ubifs_zbranch *zbr2) +{ + int err, nlen1, nlen2, cmp; + struct ubifs_dent_node *dent1, *dent2; + union ubifs_key key; + char key_buf[DBG_KEY_BUF_LEN]; + + ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key)); + dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); + if (!dent1) + return -ENOMEM; + dent2 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); + if (!dent2) { + err = -ENOMEM; + goto out_free; + } + + err = ubifs_tnc_read_node(c, zbr1, dent1); + if (err) + goto out_free; + err = ubifs_validate_entry(c, dent1); + if (err) + goto out_free; + + err = ubifs_tnc_read_node(c, zbr2, dent2); + if (err) + goto out_free; + err = ubifs_validate_entry(c, dent2); + if (err) + goto out_free; + + /* Make sure node keys are the same as in zbranch */ + err = 1; + key_read(c, &dent1->key, &key); + if (keys_cmp(c, &zbr1->key, &key)) { + ubifs_err("1st entry at %d:%d has key %s", zbr1->lnum, + zbr1->offs, dbg_snprintf_key(c, &key, key_buf, + DBG_KEY_BUF_LEN)); + ubifs_err("but it should have key %s according to tnc", + dbg_snprintf_key(c, &zbr1->key, key_buf, + DBG_KEY_BUF_LEN)); + ubifs_dump_node(c, dent1); + goto out_free; + } + + key_read(c, &dent2->key, &key); + if (keys_cmp(c, &zbr2->key, &key)) { + ubifs_err("2nd entry at %d:%d has key %s", zbr1->lnum, + zbr1->offs, dbg_snprintf_key(c, &key, key_buf, + DBG_KEY_BUF_LEN)); + ubifs_err("but it should have key %s according to tnc", + dbg_snprintf_key(c, &zbr2->key, key_buf, + DBG_KEY_BUF_LEN)); + ubifs_dump_node(c, dent2); + goto out_free; + } + + nlen1 = le16_to_cpu(dent1->nlen); + nlen2 = le16_to_cpu(dent2->nlen); + + cmp = memcmp(dent1->name, dent2->name, min_t(int, nlen1, nlen2)); + if (cmp < 0 || (cmp == 0 && nlen1 < nlen2)) { + err = 0; + goto out_free; + } + if (cmp == 0 && nlen1 == nlen2) + ubifs_err("2 xent/dent nodes with the same name"); + else + ubifs_err("bad order of colliding key %s", + dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); + + ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); + ubifs_dump_node(c, dent1); + ubifs_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs); + ubifs_dump_node(c, dent2); + +out_free: + kfree(dent2); + kfree(dent1); + return err; +} + +/** + * dbg_check_znode - check if znode is all right. + * @c: UBIFS file-system description object + * @zbr: zbranch which points to this znode + * + * This function makes sure that znode referred to by @zbr is all right. + * Returns zero if it is, and %-EINVAL if it is not. + */ +static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr) +{ + struct ubifs_znode *znode = zbr->znode; + struct ubifs_znode *zp = znode->parent; + int n, err, cmp; + + if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) { + err = 1; + goto out; + } + if (znode->level < 0) { + err = 2; + goto out; + } + if (znode->iip < 0 || znode->iip >= c->fanout) { + err = 3; + goto out; + } + + if (zbr->len == 0) + /* Only dirty zbranch may have no on-flash nodes */ + if (!ubifs_zn_dirty(znode)) { + err = 4; + goto out; + } + + if (ubifs_zn_dirty(znode)) { + /* + * If znode is dirty, its parent has to be dirty as well. The + * order of the operation is important, so we have to have + * memory barriers. + */ + smp_mb(); + if (zp && !ubifs_zn_dirty(zp)) { + /* + * The dirty flag is atomic and is cleared outside the + * TNC mutex, so znode's dirty flag may now have + * been cleared. The child is always cleared before the + * parent, so we just need to check again. + */ + smp_mb(); + if (ubifs_zn_dirty(znode)) { + err = 5; + goto out; + } + } + } + + if (zp) { + const union ubifs_key *min, *max; + + if (znode->level != zp->level - 1) { + err = 6; + goto out; + } + + /* Make sure the 'parent' pointer in our znode is correct */ + err = ubifs_search_zbranch(c, zp, &zbr->key, &n); + if (!err) { + /* This zbranch does not exist in the parent */ + err = 7; + goto out; + } + + if (znode->iip >= zp->child_cnt) { + err = 8; + goto out; + } + + if (znode->iip != n) { + /* This may happen only in case of collisions */ + if (keys_cmp(c, &zp->zbranch[n].key, + &zp->zbranch[znode->iip].key)) { + err = 9; + goto out; + } + n = znode->iip; + } + + /* + * Make sure that the first key in our znode is greater than or + * equal to the key in the pointing zbranch. + */ + min = &zbr->key; + cmp = keys_cmp(c, min, &znode->zbranch[0].key); + if (cmp == 1) { + err = 10; + goto out; + } + + if (n + 1 < zp->child_cnt) { + max = &zp->zbranch[n + 1].key; + + /* + * Make sure the last key in our znode is less or + * equivalent than the key in the zbranch which goes + * after our pointing zbranch. + */ + cmp = keys_cmp(c, max, + &znode->zbranch[znode->child_cnt - 1].key); + if (cmp == -1) { + err = 11; + goto out; + } + } + } else { + /* This may only be root znode */ + if (zbr != &c->zroot) { + err = 12; + goto out; + } + } + + /* + * Make sure that next key is greater or equivalent then the previous + * one. + */ + for (n = 1; n < znode->child_cnt; n++) { + cmp = keys_cmp(c, &znode->zbranch[n - 1].key, + &znode->zbranch[n].key); + if (cmp > 0) { + err = 13; + goto out; + } + if (cmp == 0) { + /* This can only be keys with colliding hash */ + if (!is_hash_key(c, &znode->zbranch[n].key)) { + err = 14; + goto out; + } + + if (znode->level != 0 || c->replaying) + continue; + + /* + * Colliding keys should follow binary order of + * corresponding xentry/dentry names. + */ + err = dbg_check_key_order(c, &znode->zbranch[n - 1], + &znode->zbranch[n]); + if (err < 0) + return err; + if (err) { + err = 15; + goto out; + } + } + } + + for (n = 0; n < znode->child_cnt; n++) { + if (!znode->zbranch[n].znode && + (znode->zbranch[n].lnum == 0 || + znode->zbranch[n].len == 0)) { + err = 16; + goto out; + } + + if (znode->zbranch[n].lnum != 0 && + znode->zbranch[n].len == 0) { + err = 17; + goto out; + } + + if (znode->zbranch[n].lnum == 0 && + znode->zbranch[n].len != 0) { + err = 18; + goto out; + } + + if (znode->zbranch[n].lnum == 0 && + znode->zbranch[n].offs != 0) { + err = 19; + goto out; + } + + if (znode->level != 0 && znode->zbranch[n].znode) + if (znode->zbranch[n].znode->parent != znode) { + err = 20; + goto out; + } + } + + return 0; + +out: + ubifs_err("failed, error %d", err); + ubifs_msg("dump of the znode"); + ubifs_dump_znode(c, znode); + if (zp) { + ubifs_msg("dump of the parent znode"); + ubifs_dump_znode(c, zp); + } + dump_stack(); + return -EINVAL; +} +#else + +int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) +{ + return 0; +} + +void dbg_debugfs_exit_fs(struct ubifs_info *c) +{ + return; +} + int ubifs_debugging_init(struct ubifs_info *c) { - c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); - if (!c->dbg) + return 0; +} +void ubifs_debugging_exit(struct ubifs_info *c) +{ +} +int dbg_check_filesystem(struct ubifs_info *c) +{ + return 0; +} +int dbg_debugfs_init_fs(struct ubifs_info *c) +{ + return 0; +} +#endif + +#ifndef __UBOOT__ +/** + * dbg_check_tnc - check TNC tree. + * @c: UBIFS file-system description object + * @extra: do extra checks that are possible at start commit + * + * This function traverses whole TNC tree and checks every znode. Returns zero + * if everything is all right and %-EINVAL if something is wrong with TNC. + */ +int dbg_check_tnc(struct ubifs_info *c, int extra) +{ + struct ubifs_znode *znode; + long clean_cnt = 0, dirty_cnt = 0; + int err, last; + + if (!dbg_is_chk_index(c)) + return 0; + + ubifs_assert(mutex_is_locked(&c->tnc_mutex)); + if (!c->zroot.znode) + return 0; + + znode = ubifs_tnc_postorder_first(c->zroot.znode); + while (1) { + struct ubifs_znode *prev; + struct ubifs_zbranch *zbr; + + if (!znode->parent) + zbr = &c->zroot; + else + zbr = &znode->parent->zbranch[znode->iip]; + + err = dbg_check_znode(c, zbr); + if (err) + return err; + + if (extra) { + if (ubifs_zn_dirty(znode)) + dirty_cnt += 1; + else + clean_cnt += 1; + } + + prev = znode; + znode = ubifs_tnc_postorder_next(znode); + if (!znode) + break; + + /* + * If the last key of this znode is equivalent to the first key + * of the next znode (collision), then check order of the keys. + */ + last = prev->child_cnt - 1; + if (prev->level == 0 && znode->level == 0 && !c->replaying && + !keys_cmp(c, &prev->zbranch[last].key, + &znode->zbranch[0].key)) { + err = dbg_check_key_order(c, &prev->zbranch[last], + &znode->zbranch[0]); + if (err < 0) + return err; + if (err) { + ubifs_msg("first znode"); + ubifs_dump_znode(c, prev); + ubifs_msg("second znode"); + ubifs_dump_znode(c, znode); + return -EINVAL; + } + } + } + + if (extra) { + if (clean_cnt != atomic_long_read(&c->clean_zn_cnt)) { + ubifs_err("incorrect clean_zn_cnt %ld, calculated %ld", + atomic_long_read(&c->clean_zn_cnt), + clean_cnt); + return -EINVAL; + } + if (dirty_cnt != atomic_long_read(&c->dirty_zn_cnt)) { + ubifs_err("incorrect dirty_zn_cnt %ld, calculated %ld", + atomic_long_read(&c->dirty_zn_cnt), + dirty_cnt); + return -EINVAL; + } + } + + return 0; +} +#else +int dbg_check_tnc(struct ubifs_info *c, int extra) +{ + return 0; +} +#endif + +/** + * dbg_walk_index - walk the on-flash index. + * @c: UBIFS file-system description object + * @leaf_cb: called for each leaf node + * @znode_cb: called for each indexing node + * @priv: private data which is passed to callbacks + * + * This function walks the UBIFS index and calls the @leaf_cb for each leaf + * node and @znode_cb for each indexing node. Returns zero in case of success + * and a negative error code in case of failure. + * + * It would be better if this function removed every znode it pulled to into + * the TNC, so that the behavior more closely matched the non-debugging + * behavior. + */ +int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, + dbg_znode_callback znode_cb, void *priv) +{ + int err; + struct ubifs_zbranch *zbr; + struct ubifs_znode *znode, *child; + + mutex_lock(&c->tnc_mutex); + /* If the root indexing node is not in TNC - pull it */ + if (!c->zroot.znode) { + c->zroot.znode = ubifs_load_znode(c, &c->zroot, NULL, 0); + if (IS_ERR(c->zroot.znode)) { + err = PTR_ERR(c->zroot.znode); + c->zroot.znode = NULL; + goto out_unlock; + } + } + + /* + * We are going to traverse the indexing tree in the postorder manner. + * Go down and find the leftmost indexing node where we are going to + * start from. + */ + znode = c->zroot.znode; + while (znode->level > 0) { + zbr = &znode->zbranch[0]; + child = zbr->znode; + if (!child) { + child = ubifs_load_znode(c, zbr, znode, 0); + if (IS_ERR(child)) { + err = PTR_ERR(child); + goto out_unlock; + } + zbr->znode = child; + } + + znode = child; + } + + /* Iterate over all indexing nodes */ + while (1) { + int idx; + + cond_resched(); + + if (znode_cb) { + err = znode_cb(c, znode, priv); + if (err) { + ubifs_err("znode checking function returned error %d", + err); + ubifs_dump_znode(c, znode); + goto out_dump; + } + } + if (leaf_cb && znode->level == 0) { + for (idx = 0; idx < znode->child_cnt; idx++) { + zbr = &znode->zbranch[idx]; + err = leaf_cb(c, zbr, priv); + if (err) { + ubifs_err("leaf checking function returned error %d, for leaf at LEB %d:%d", + err, zbr->lnum, zbr->offs); + goto out_dump; + } + } + } + + if (!znode->parent) + break; + + idx = znode->iip + 1; + znode = znode->parent; + if (idx < znode->child_cnt) { + /* Switch to the next index in the parent */ + zbr = &znode->zbranch[idx]; + child = zbr->znode; + if (!child) { + child = ubifs_load_znode(c, zbr, znode, idx); + if (IS_ERR(child)) { + err = PTR_ERR(child); + goto out_unlock; + } + zbr->znode = child; + } + znode = child; + } else + /* + * This is the last child, switch to the parent and + * continue. + */ + continue; + + /* Go to the lowest leftmost znode in the new sub-tree */ + while (znode->level > 0) { + zbr = &znode->zbranch[0]; + child = zbr->znode; + if (!child) { + child = ubifs_load_znode(c, zbr, znode, 0); + if (IS_ERR(child)) { + err = PTR_ERR(child); + goto out_unlock; + } + zbr->znode = child; + } + znode = child; + } + } + + mutex_unlock(&c->tnc_mutex); + return 0; + +out_dump: + if (znode->parent) + zbr = &znode->parent->zbranch[znode->iip]; + else + zbr = &c->zroot; + ubifs_msg("dump of znode at LEB %d:%d", zbr->lnum, zbr->offs); + ubifs_dump_znode(c, znode); +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * add_size - add znode size to partially calculated index size. + * @c: UBIFS file-system description object + * @znode: znode to add size for + * @priv: partially calculated index size + * + * This is a helper function for 'dbg_check_idx_size()' which is called for + * every indexing node and adds its size to the 'long long' variable pointed to + * by @priv. + */ +static int add_size(struct ubifs_info *c, struct ubifs_znode *znode, void *priv) +{ + long long *idx_size = priv; + int add; + + add = ubifs_idx_node_sz(c, znode->child_cnt); + add = ALIGN(add, 8); + *idx_size += add; + return 0; +} + +/** + * dbg_check_idx_size - check index size. + * @c: UBIFS file-system description object + * @idx_size: size to check + * + * This function walks the UBIFS index, calculates its size and checks that the + * size is equivalent to @idx_size. Returns zero in case of success and a + * negative error code in case of failure. + */ +int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) +{ + int err; + long long calc = 0; + + if (!dbg_is_chk_index(c)) + return 0; + + err = dbg_walk_index(c, NULL, add_size, &calc); + if (err) { + ubifs_err("error %d while walking the index", err); + return err; + } + + if (calc != idx_size) { + ubifs_err("index size check failed: calculated size is %lld, should be %lld", + calc, idx_size); + dump_stack(); + return -EINVAL; + } + + return 0; +} + +#ifndef __UBOOT__ +/** + * struct fsck_inode - information about an inode used when checking the file-system. + * @rb: link in the RB-tree of inodes + * @inum: inode number + * @mode: inode type, permissions, etc + * @nlink: inode link count + * @xattr_cnt: count of extended attributes + * @references: how many directory/xattr entries refer this inode (calculated + * while walking the index) + * @calc_cnt: for directory inode count of child directories + * @size: inode size (read from on-flash inode) + * @xattr_sz: summary size of all extended attributes (read from on-flash + * inode) + * @calc_sz: for directories calculated directory size + * @calc_xcnt: count of extended attributes + * @calc_xsz: calculated summary size of all extended attributes + * @xattr_nms: sum of lengths of all extended attribute names belonging to this + * inode (read from on-flash inode) + * @calc_xnms: calculated sum of lengths of all extended attribute names + */ +struct fsck_inode { + struct rb_node rb; + ino_t inum; + umode_t mode; + unsigned int nlink; + unsigned int xattr_cnt; + int references; + int calc_cnt; + long long size; + unsigned int xattr_sz; + long long calc_sz; + long long calc_xcnt; + long long calc_xsz; + unsigned int xattr_nms; + long long calc_xnms; +}; + +/** + * struct fsck_data - private FS checking information. + * @inodes: RB-tree of all inodes (contains @struct fsck_inode objects) + */ +struct fsck_data { + struct rb_root inodes; +}; + +/** + * add_inode - add inode information to RB-tree of inodes. + * @c: UBIFS file-system description object + * @fsckd: FS checking information + * @ino: raw UBIFS inode to add + * + * This is a helper function for 'check_leaf()' which adds information about + * inode @ino to the RB-tree of inodes. Returns inode information pointer in + * case of success and a negative error code in case of failure. + */ +static struct fsck_inode *add_inode(struct ubifs_info *c, + struct fsck_data *fsckd, + struct ubifs_ino_node *ino) +{ + struct rb_node **p, *parent = NULL; + struct fsck_inode *fscki; + ino_t inum = key_inum_flash(c, &ino->key); + struct inode *inode; + struct ubifs_inode *ui; + + p = &fsckd->inodes.rb_node; + while (*p) { + parent = *p; + fscki = rb_entry(parent, struct fsck_inode, rb); + if (inum < fscki->inum) + p = &(*p)->rb_left; + else if (inum > fscki->inum) + p = &(*p)->rb_right; + else + return fscki; + } + + if (inum > c->highest_inum) { + ubifs_err("too high inode number, max. is %lu", + (unsigned long)c->highest_inum); + return ERR_PTR(-EINVAL); + } + + fscki = kzalloc(sizeof(struct fsck_inode), GFP_NOFS); + if (!fscki) + return ERR_PTR(-ENOMEM); + + inode = ilookup(c->vfs_sb, inum); + + fscki->inum = inum; + /* + * If the inode is present in the VFS inode cache, use it instead of + * the on-flash inode which might be out-of-date. E.g., the size might + * be out-of-date. If we do not do this, the following may happen, for + * example: + * 1. A power cut happens + * 2. We mount the file-system R/O, the replay process fixes up the + * inode size in the VFS cache, but on on-flash. + * 3. 'check_leaf()' fails because it hits a data node beyond inode + * size. + */ + if (!inode) { + fscki->nlink = le32_to_cpu(ino->nlink); + fscki->size = le64_to_cpu(ino->size); + fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); + fscki->xattr_sz = le32_to_cpu(ino->xattr_size); + fscki->xattr_nms = le32_to_cpu(ino->xattr_names); + fscki->mode = le32_to_cpu(ino->mode); + } else { + ui = ubifs_inode(inode); + fscki->nlink = inode->i_nlink; + fscki->size = inode->i_size; + fscki->xattr_cnt = ui->xattr_cnt; + fscki->xattr_sz = ui->xattr_size; + fscki->xattr_nms = ui->xattr_names; + fscki->mode = inode->i_mode; + iput(inode); + } + + if (S_ISDIR(fscki->mode)) { + fscki->calc_sz = UBIFS_INO_NODE_SZ; + fscki->calc_cnt = 2; + } + + rb_link_node(&fscki->rb, parent, p); + rb_insert_color(&fscki->rb, &fsckd->inodes); + + return fscki; +} + +/** + * search_inode - search inode in the RB-tree of inodes. + * @fsckd: FS checking information + * @inum: inode number to search + * + * This is a helper function for 'check_leaf()' which searches inode @inum in + * the RB-tree of inodes and returns an inode information pointer or %NULL if + * the inode was not found. + */ +static struct fsck_inode *search_inode(struct fsck_data *fsckd, ino_t inum) +{ + struct rb_node *p; + struct fsck_inode *fscki; + + p = fsckd->inodes.rb_node; + while (p) { + fscki = rb_entry(p, struct fsck_inode, rb); + if (inum < fscki->inum) + p = p->rb_left; + else if (inum > fscki->inum) + p = p->rb_right; + else + return fscki; + } + return NULL; +} + +/** + * read_add_inode - read inode node and add it to RB-tree of inodes. + * @c: UBIFS file-system description object + * @fsckd: FS checking information + * @inum: inode number to read + * + * This is a helper function for 'check_leaf()' which finds inode node @inum in + * the index, reads it, and adds it to the RB-tree of inodes. Returns inode + * information pointer in case of success and a negative error code in case of + * failure. + */ +static struct fsck_inode *read_add_inode(struct ubifs_info *c, + struct fsck_data *fsckd, ino_t inum) +{ + int n, err; + union ubifs_key key; + struct ubifs_znode *znode; + struct ubifs_zbranch *zbr; + struct ubifs_ino_node *ino; + struct fsck_inode *fscki; + + fscki = search_inode(fsckd, inum); + if (fscki) + return fscki; + + ino_key_init(c, &key, inum); + err = ubifs_lookup_level0(c, &key, &znode, &n); + if (!err) { + ubifs_err("inode %lu not found in index", (unsigned long)inum); + return ERR_PTR(-ENOENT); + } else if (err < 0) { + ubifs_err("error %d while looking up inode %lu", + err, (unsigned long)inum); + return ERR_PTR(err); + } + + zbr = &znode->zbranch[n]; + if (zbr->len < UBIFS_INO_NODE_SZ) { + ubifs_err("bad node %lu node length %d", + (unsigned long)inum, zbr->len); + return ERR_PTR(-EINVAL); + } + + ino = kmalloc(zbr->len, GFP_NOFS); + if (!ino) + return ERR_PTR(-ENOMEM); + + err = ubifs_tnc_read_node(c, zbr, ino); + if (err) { + ubifs_err("cannot read inode node at LEB %d:%d, error %d", + zbr->lnum, zbr->offs, err); + kfree(ino); + return ERR_PTR(err); + } + + fscki = add_inode(c, fsckd, ino); + kfree(ino); + if (IS_ERR(fscki)) { + ubifs_err("error %ld while adding inode %lu node", + PTR_ERR(fscki), (unsigned long)inum); + return fscki; + } + + return fscki; +} + +/** + * check_leaf - check leaf node. + * @c: UBIFS file-system description object + * @zbr: zbranch of the leaf node to check + * @priv: FS checking information + * + * This is a helper function for 'dbg_check_filesystem()' which is called for + * every single leaf node while walking the indexing tree. It checks that the + * leaf node referred from the indexing tree exists, has correct CRC, and does + * some other basic validation. This function is also responsible for building + * an RB-tree of inodes - it adds all inodes into the RB-tree. It also + * calculates reference count, size, etc for each inode in order to later + * compare them to the information stored inside the inodes and detect possible + * inconsistencies. Returns zero in case of success and a negative error code + * in case of failure. + */ +static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *priv) +{ + ino_t inum; + void *node; + struct ubifs_ch *ch; + int err, type = key_type(c, &zbr->key); + struct fsck_inode *fscki; + + if (zbr->len < UBIFS_CH_SZ) { + ubifs_err("bad leaf length %d (LEB %d:%d)", + zbr->len, zbr->lnum, zbr->offs); + return -EINVAL; + } + + node = kmalloc(zbr->len, GFP_NOFS); + if (!node) return -ENOMEM; - c->dbg->buf = vmalloc(c->leb_size); - if (!c->dbg->buf) + err = ubifs_tnc_read_node(c, zbr, node); + if (err) { + ubifs_err("cannot read leaf node at LEB %d:%d, error %d", + zbr->lnum, zbr->offs, err); + goto out_free; + } + + /* If this is an inode node, add it to RB-tree of inodes */ + if (type == UBIFS_INO_KEY) { + fscki = add_inode(c, priv, node); + if (IS_ERR(fscki)) { + err = PTR_ERR(fscki); + ubifs_err("error %d while adding inode node", err); + goto out_dump; + } goto out; + } + + if (type != UBIFS_DENT_KEY && type != UBIFS_XENT_KEY && + type != UBIFS_DATA_KEY) { + ubifs_err("unexpected node type %d at LEB %d:%d", + type, zbr->lnum, zbr->offs); + err = -EINVAL; + goto out_free; + } + + ch = node; + if (le64_to_cpu(ch->sqnum) > c->max_sqnum) { + ubifs_err("too high sequence number, max. is %llu", + c->max_sqnum); + err = -EINVAL; + goto out_dump; + } + + if (type == UBIFS_DATA_KEY) { + long long blk_offs; + struct ubifs_data_node *dn = node; + + /* + * Search the inode node this data node belongs to and insert + * it to the RB-tree of inodes. + */ + inum = key_inum_flash(c, &dn->key); + fscki = read_add_inode(c, priv, inum); + if (IS_ERR(fscki)) { + err = PTR_ERR(fscki); + ubifs_err("error %d while processing data node and trying to find inode node %lu", + err, (unsigned long)inum); + goto out_dump; + } + + /* Make sure the data node is within inode size */ + blk_offs = key_block_flash(c, &dn->key); + blk_offs <<= UBIFS_BLOCK_SHIFT; + blk_offs += le32_to_cpu(dn->size); + if (blk_offs > fscki->size) { + ubifs_err("data node at LEB %d:%d is not within inode size %lld", + zbr->lnum, zbr->offs, fscki->size); + err = -EINVAL; + goto out_dump; + } + } else { + int nlen; + struct ubifs_dent_node *dent = node; + struct fsck_inode *fscki1; + + err = ubifs_validate_entry(c, dent); + if (err) + goto out_dump; + + /* + * Search the inode node this entry refers to and the parent + * inode node and insert them to the RB-tree of inodes. + */ + inum = le64_to_cpu(dent->inum); + fscki = read_add_inode(c, priv, inum); + if (IS_ERR(fscki)) { + err = PTR_ERR(fscki); + ubifs_err("error %d while processing entry node and trying to find inode node %lu", + err, (unsigned long)inum); + goto out_dump; + } + + /* Count how many direntries or xentries refers this inode */ + fscki->references += 1; + + inum = key_inum_flash(c, &dent->key); + fscki1 = read_add_inode(c, priv, inum); + if (IS_ERR(fscki1)) { + err = PTR_ERR(fscki1); + ubifs_err("error %d while processing entry node and trying to find parent inode node %lu", + err, (unsigned long)inum); + goto out_dump; + } + + nlen = le16_to_cpu(dent->nlen); + if (type == UBIFS_XENT_KEY) { + fscki1->calc_xcnt += 1; + fscki1->calc_xsz += CALC_DENT_SIZE(nlen); + fscki1->calc_xsz += CALC_XATTR_BYTES(fscki->size); + fscki1->calc_xnms += nlen; + } else { + fscki1->calc_sz += CALC_DENT_SIZE(nlen); + if (dent->type == UBIFS_ITYPE_DIR) + fscki1->calc_cnt += 1; + } + } + +out: + kfree(node); + return 0; + +out_dump: + ubifs_msg("dump of node at LEB %d:%d", zbr->lnum, zbr->offs); + ubifs_dump_node(c, node); +out_free: + kfree(node); + return err; +} + +/** + * free_inodes - free RB-tree of inodes. + * @fsckd: FS checking information + */ +static void free_inodes(struct fsck_data *fsckd) +{ + struct fsck_inode *fscki, *n; + + rbtree_postorder_for_each_entry_safe(fscki, n, &fsckd->inodes, rb) + kfree(fscki); +} + +/** + * check_inodes - checks all inodes. + * @c: UBIFS file-system description object + * @fsckd: FS checking information + * + * This is a helper function for 'dbg_check_filesystem()' which walks the + * RB-tree of inodes after the index scan has been finished, and checks that + * inode nlink, size, etc are correct. Returns zero if inodes are fine, + * %-EINVAL if not, and a negative error code in case of failure. + */ +static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd) +{ + int n, err; + union ubifs_key key; + struct ubifs_znode *znode; + struct ubifs_zbranch *zbr; + struct ubifs_ino_node *ino; + struct fsck_inode *fscki; + struct rb_node *this = rb_first(&fsckd->inodes); + + while (this) { + fscki = rb_entry(this, struct fsck_inode, rb); + this = rb_next(this); + + if (S_ISDIR(fscki->mode)) { + /* + * Directories have to have exactly one reference (they + * cannot have hardlinks), although root inode is an + * exception. + */ + if (fscki->inum != UBIFS_ROOT_INO && + fscki->references != 1) { + ubifs_err("directory inode %lu has %d direntries which refer it, but should be 1", + (unsigned long)fscki->inum, + fscki->references); + goto out_dump; + } + if (fscki->inum == UBIFS_ROOT_INO && + fscki->references != 0) { + ubifs_err("root inode %lu has non-zero (%d) direntries which refer it", + (unsigned long)fscki->inum, + fscki->references); + goto out_dump; + } + if (fscki->calc_sz != fscki->size) { + ubifs_err("directory inode %lu size is %lld, but calculated size is %lld", + (unsigned long)fscki->inum, + fscki->size, fscki->calc_sz); + goto out_dump; + } + if (fscki->calc_cnt != fscki->nlink) { + ubifs_err("directory inode %lu nlink is %d, but calculated nlink is %d", + (unsigned long)fscki->inum, + fscki->nlink, fscki->calc_cnt); + goto out_dump; + } + } else { + if (fscki->references != fscki->nlink) { + ubifs_err("inode %lu nlink is %d, but calculated nlink is %d", + (unsigned long)fscki->inum, + fscki->nlink, fscki->references); + goto out_dump; + } + } + if (fscki->xattr_sz != fscki->calc_xsz) { + ubifs_err("inode %lu has xattr size %u, but calculated size is %lld", + (unsigned long)fscki->inum, fscki->xattr_sz, + fscki->calc_xsz); + goto out_dump; + } + if (fscki->xattr_cnt != fscki->calc_xcnt) { + ubifs_err("inode %lu has %u xattrs, but calculated count is %lld", + (unsigned long)fscki->inum, + fscki->xattr_cnt, fscki->calc_xcnt); + goto out_dump; + } + if (fscki->xattr_nms != fscki->calc_xnms) { + ubifs_err("inode %lu has xattr names' size %u, but calculated names' size is %lld", + (unsigned long)fscki->inum, fscki->xattr_nms, + fscki->calc_xnms); + goto out_dump; + } + } + + return 0; + +out_dump: + /* Read the bad inode and dump it */ + ino_key_init(c, &key, fscki->inum); + err = ubifs_lookup_level0(c, &key, &znode, &n); + if (!err) { + ubifs_err("inode %lu not found in index", + (unsigned long)fscki->inum); + return -ENOENT; + } else if (err < 0) { + ubifs_err("error %d while looking up inode %lu", + err, (unsigned long)fscki->inum); + return err; + } + + zbr = &znode->zbranch[n]; + ino = kmalloc(zbr->len, GFP_NOFS); + if (!ino) + return -ENOMEM; + + err = ubifs_tnc_read_node(c, zbr, ino); + if (err) { + ubifs_err("cannot read inode node at LEB %d:%d, error %d", + zbr->lnum, zbr->offs, err); + kfree(ino); + return err; + } + + ubifs_msg("dump of the inode %lu sitting in LEB %d:%d", + (unsigned long)fscki->inum, zbr->lnum, zbr->offs); + ubifs_dump_node(c, ino); + kfree(ino); + return -EINVAL; +} + +/** + * dbg_check_filesystem - check the file-system. + * @c: UBIFS file-system description object + * + * This function checks the file system, namely: + * o makes sure that all leaf nodes exist and their CRCs are correct; + * o makes sure inode nlink, size, xattr size/count are correct (for all + * inodes). + * + * The function reads whole indexing tree and all nodes, so it is pretty + * heavy-weight. Returns zero if the file-system is consistent, %-EINVAL if + * not, and a negative error code in case of failure. + */ +int dbg_check_filesystem(struct ubifs_info *c) +{ + int err; + struct fsck_data fsckd; + + if (!dbg_is_chk_fs(c)) + return 0; + + fsckd.inodes = RB_ROOT; + err = dbg_walk_index(c, check_leaf, NULL, &fsckd); + if (err) + goto out_free; + + err = check_inodes(c, &fsckd); + if (err) + goto out_free; + + free_inodes(&fsckd); + return 0; + +out_free: + ubifs_err("file-system check failed with error %d", err); + dump_stack(); + free_inodes(&fsckd); + return err; +} + +/** + * dbg_check_data_nodes_order - check that list of data nodes is sorted. + * @c: UBIFS file-system description object + * @head: the list of nodes ('struct ubifs_scan_node' objects) + * + * This function returns zero if the list of data nodes is sorted correctly, + * and %-EINVAL if not. + */ +int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) +{ + struct list_head *cur; + struct ubifs_scan_node *sa, *sb; + + if (!dbg_is_chk_gen(c)) + return 0; + + for (cur = head->next; cur->next != head; cur = cur->next) { + ino_t inuma, inumb; + uint32_t blka, blkb; + + cond_resched(); + sa = container_of(cur, struct ubifs_scan_node, list); + sb = container_of(cur->next, struct ubifs_scan_node, list); + + if (sa->type != UBIFS_DATA_NODE) { + ubifs_err("bad node type %d", sa->type); + ubifs_dump_node(c, sa->node); + return -EINVAL; + } + if (sb->type != UBIFS_DATA_NODE) { + ubifs_err("bad node type %d", sb->type); + ubifs_dump_node(c, sb->node); + return -EINVAL; + } + + inuma = key_inum(c, &sa->key); + inumb = key_inum(c, &sb->key); + + if (inuma < inumb) + continue; + if (inuma > inumb) { + ubifs_err("larger inum %lu goes before inum %lu", + (unsigned long)inuma, (unsigned long)inumb); + goto error_dump; + } + + blka = key_block(c, &sa->key); + blkb = key_block(c, &sb->key); + + if (blka > blkb) { + ubifs_err("larger block %u goes before %u", blka, blkb); + goto error_dump; + } + if (blka == blkb) { + ubifs_err("two data nodes for the same block"); + goto error_dump; + } + } return 0; +error_dump: + ubifs_dump_node(c, sa->node); + ubifs_dump_node(c, sb->node); + return -EINVAL; +} + +/** + * dbg_check_nondata_nodes_order - check that list of data nodes is sorted. + * @c: UBIFS file-system description object + * @head: the list of nodes ('struct ubifs_scan_node' objects) + * + * This function returns zero if the list of non-data nodes is sorted correctly, + * and %-EINVAL if not. + */ +int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) +{ + struct list_head *cur; + struct ubifs_scan_node *sa, *sb; + + if (!dbg_is_chk_gen(c)) + return 0; + + for (cur = head->next; cur->next != head; cur = cur->next) { + ino_t inuma, inumb; + uint32_t hasha, hashb; + + cond_resched(); + sa = container_of(cur, struct ubifs_scan_node, list); + sb = container_of(cur->next, struct ubifs_scan_node, list); + + if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && + sa->type != UBIFS_XENT_NODE) { + ubifs_err("bad node type %d", sa->type); + ubifs_dump_node(c, sa->node); + return -EINVAL; + } + if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && + sa->type != UBIFS_XENT_NODE) { + ubifs_err("bad node type %d", sb->type); + ubifs_dump_node(c, sb->node); + return -EINVAL; + } + + if (sa->type != UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) { + ubifs_err("non-inode node goes before inode node"); + goto error_dump; + } + + if (sa->type == UBIFS_INO_NODE && sb->type != UBIFS_INO_NODE) + continue; + + if (sa->type == UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) { + /* Inode nodes are sorted in descending size order */ + if (sa->len < sb->len) { + ubifs_err("smaller inode node goes first"); + goto error_dump; + } + continue; + } + + /* + * This is either a dentry or xentry, which should be sorted in + * ascending (parent ino, hash) order. + */ + inuma = key_inum(c, &sa->key); + inumb = key_inum(c, &sb->key); + + if (inuma < inumb) + continue; + if (inuma > inumb) { + ubifs_err("larger inum %lu goes before inum %lu", + (unsigned long)inuma, (unsigned long)inumb); + goto error_dump; + } + + hasha = key_block(c, &sa->key); + hashb = key_block(c, &sb->key); + + if (hasha > hashb) { + ubifs_err("larger hash %u goes before %u", + hasha, hashb); + goto error_dump; + } + } + + return 0; + +error_dump: + ubifs_msg("dumping first node"); + ubifs_dump_node(c, sa->node); + ubifs_msg("dumping second node"); + ubifs_dump_node(c, sb->node); + return -EINVAL; + return 0; +} + +static inline int chance(unsigned int n, unsigned int out_of) +{ + return !!((prandom_u32() % out_of) + 1 <= n); + +} + +static int power_cut_emulated(struct ubifs_info *c, int lnum, int write) +{ + struct ubifs_debug_info *d = c->dbg; + + ubifs_assert(dbg_is_tst_rcvry(c)); + + if (!d->pc_cnt) { + /* First call - decide delay to the power cut */ + if (chance(1, 2)) { + unsigned long delay; + + if (chance(1, 2)) { + d->pc_delay = 1; + /* Fail withing 1 minute */ + delay = prandom_u32() % 60000; + d->pc_timeout = jiffies; + d->pc_timeout += msecs_to_jiffies(delay); + ubifs_warn("failing after %lums", delay); + } else { + d->pc_delay = 2; + delay = prandom_u32() % 10000; + /* Fail within 10000 operations */ + d->pc_cnt_max = delay; + ubifs_warn("failing after %lu calls", delay); + } + } + + d->pc_cnt += 1; + } + + /* Determine if failure delay has expired */ + if (d->pc_delay == 1 && time_before(jiffies, d->pc_timeout)) + return 0; + if (d->pc_delay == 2 && d->pc_cnt++ < d->pc_cnt_max) + return 0; + + if (lnum == UBIFS_SB_LNUM) { + if (write && chance(1, 2)) + return 0; + if (chance(19, 20)) + return 0; + ubifs_warn("failing in super block LEB %d", lnum); + } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) { + if (chance(19, 20)) + return 0; + ubifs_warn("failing in master LEB %d", lnum); + } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) { + if (write && chance(99, 100)) + return 0; + if (chance(399, 400)) + return 0; + ubifs_warn("failing in log LEB %d", lnum); + } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) { + if (write && chance(7, 8)) + return 0; + if (chance(19, 20)) + return 0; + ubifs_warn("failing in LPT LEB %d", lnum); + } else if (lnum >= c->orph_first && lnum <= c->orph_last) { + if (write && chance(1, 2)) + return 0; + if (chance(9, 10)) + return 0; + ubifs_warn("failing in orphan LEB %d", lnum); + } else if (lnum == c->ihead_lnum) { + if (chance(99, 100)) + return 0; + ubifs_warn("failing in index head LEB %d", lnum); + } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) { + if (chance(9, 10)) + return 0; + ubifs_warn("failing in GC head LEB %d", lnum); + } else if (write && !RB_EMPTY_ROOT(&c->buds) && + !ubifs_search_bud(c, lnum)) { + if (chance(19, 20)) + return 0; + ubifs_warn("failing in non-bud LEB %d", lnum); + } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND || + c->cmt_state == COMMIT_RUNNING_REQUIRED) { + if (chance(999, 1000)) + return 0; + ubifs_warn("failing in bud LEB %d commit running", lnum); + } else { + if (chance(9999, 10000)) + return 0; + ubifs_warn("failing in bud LEB %d commit not running", lnum); + } + + d->pc_happened = 1; + ubifs_warn("========== Power cut emulated =========="); + dump_stack(); + return 1; +} + +static int corrupt_data(const struct ubifs_info *c, const void *buf, + unsigned int len) +{ + unsigned int from, to, ffs = chance(1, 2); + unsigned char *p = (void *)buf; + + from = prandom_u32() % len; + /* Corruption span max to end of write unit */ + to = min(len, ALIGN(from + 1, c->max_write_size)); + + ubifs_warn("filled bytes %u-%u with %s", from, to - 1, + ffs ? "0xFFs" : "random data"); + + if (ffs) + memset(p + from, 0xFF, to - from); + else + prandom_bytes(p + from, to - from); + + return to; +} + +int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, + int offs, int len) +{ + int err, failing; + + if (c->dbg->pc_happened) + return -EROFS; + + failing = power_cut_emulated(c, lnum, 1); + if (failing) { + len = corrupt_data(c, buf, len); + ubifs_warn("actually write %d bytes to LEB %d:%d (the buffer was corrupted)", + len, lnum, offs); + } + err = ubi_leb_write(c->ubi, lnum, buf, offs, len); + if (err) + return err; + if (failing) + return -EROFS; + return 0; +} + +int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, + int len) +{ + int err; + + if (c->dbg->pc_happened) + return -EROFS; + if (power_cut_emulated(c, lnum, 1)) + return -EROFS; + err = ubi_leb_change(c->ubi, lnum, buf, len); + if (err) + return err; + if (power_cut_emulated(c, lnum, 1)) + return -EROFS; + return 0; +} + +int dbg_leb_unmap(struct ubifs_info *c, int lnum) +{ + int err; + + if (c->dbg->pc_happened) + return -EROFS; + if (power_cut_emulated(c, lnum, 0)) + return -EROFS; + err = ubi_leb_unmap(c->ubi, lnum); + if (err) + return err; + if (power_cut_emulated(c, lnum, 0)) + return -EROFS; + return 0; +} + +int dbg_leb_map(struct ubifs_info *c, int lnum) +{ + int err; + + if (c->dbg->pc_happened) + return -EROFS; + if (power_cut_emulated(c, lnum, 0)) + return -EROFS; + err = ubi_leb_map(c->ubi, lnum); + if (err) + return err; + if (power_cut_emulated(c, lnum, 0)) + return -EROFS; + return 0; +} + +/* + * Root directory for UBIFS stuff in debugfs. Contains sub-directories which + * contain the stuff specific to particular file-system mounts. + */ +static struct dentry *dfs_rootdir; + +static int dfs_file_open(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return nonseekable_open(inode, file); +} + +/** + * provide_user_output - provide output to the user reading a debugfs file. + * @val: boolean value for the answer + * @u: the buffer to store the answer at + * @count: size of the buffer + * @ppos: position in the @u output buffer + * + * This is a simple helper function which stores @val boolean value in the user + * buffer when the user reads one of UBIFS debugfs files. Returns amount of + * bytes written to @u in case of success and a negative error code in case of + * failure. + */ +static int provide_user_output(int val, char __user *u, size_t count, + loff_t *ppos) +{ + char buf[3]; + + if (val) + buf[0] = '1'; + else + buf[0] = '0'; + buf[1] = '\n'; + buf[2] = 0x00; + + return simple_read_from_buffer(u, count, ppos, buf, 2); +} + +static ssize_t dfs_file_read(struct file *file, char __user *u, size_t count, + loff_t *ppos) +{ + struct dentry *dent = file->f_path.dentry; + struct ubifs_info *c = file->private_data; + struct ubifs_debug_info *d = c->dbg; + int val; + + if (dent == d->dfs_chk_gen) + val = d->chk_gen; + else if (dent == d->dfs_chk_index) + val = d->chk_index; + else if (dent == d->dfs_chk_orph) + val = d->chk_orph; + else if (dent == d->dfs_chk_lprops) + val = d->chk_lprops; + else if (dent == d->dfs_chk_fs) + val = d->chk_fs; + else if (dent == d->dfs_tst_rcvry) + val = d->tst_rcvry; + else if (dent == d->dfs_ro_error) + val = c->ro_error; + else + return -EINVAL; + + return provide_user_output(val, u, count, ppos); +} + +/** + * interpret_user_input - interpret user debugfs file input. + * @u: user-provided buffer with the input + * @count: buffer size + * + * This is a helper function which interpret user input to a boolean UBIFS + * debugfs file. Returns %0 or %1 in case of success and a negative error code + * in case of failure. + */ +static int interpret_user_input(const char __user *u, size_t count) +{ + size_t buf_size; + char buf[8]; + + buf_size = min_t(size_t, count, (sizeof(buf) - 1)); + if (copy_from_user(buf, u, buf_size)) + return -EFAULT; + + if (buf[0] == '1') + return 1; + else if (buf[0] == '0') + return 0; + + return -EINVAL; +} + +static ssize_t dfs_file_write(struct file *file, const char __user *u, + size_t count, loff_t *ppos) +{ + struct ubifs_info *c = file->private_data; + struct ubifs_debug_info *d = c->dbg; + struct dentry *dent = file->f_path.dentry; + int val; + + /* + * TODO: this is racy - the file-system might have already been + * unmounted and we'd oops in this case. The plan is to fix it with + * help of 'iterate_supers_type()' which we should have in v3.0: when + * a debugfs opened, we rember FS's UUID in file->private_data. Then + * whenever we access the FS via a debugfs file, we iterate all UBIFS + * superblocks and fine the one with the same UUID, and take the + * locking right. + * + * The other way to go suggested by Al Viro is to create a separate + * 'ubifs-debug' file-system instead. + */ + if (file->f_path.dentry == d->dfs_dump_lprops) { + ubifs_dump_lprops(c); + return count; + } + if (file->f_path.dentry == d->dfs_dump_budg) { + ubifs_dump_budg(c, &c->bi); + return count; + } + if (file->f_path.dentry == d->dfs_dump_tnc) { + mutex_lock(&c->tnc_mutex); + ubifs_dump_tnc(c); + mutex_unlock(&c->tnc_mutex); + return count; + } + + val = interpret_user_input(u, count); + if (val < 0) + return val; + + if (dent == d->dfs_chk_gen) + d->chk_gen = val; + else if (dent == d->dfs_chk_index) + d->chk_index = val; + else if (dent == d->dfs_chk_orph) + d->chk_orph = val; + else if (dent == d->dfs_chk_lprops) + d->chk_lprops = val; + else if (dent == d->dfs_chk_fs) + d->chk_fs = val; + else if (dent == d->dfs_tst_rcvry) + d->tst_rcvry = val; + else if (dent == d->dfs_ro_error) + c->ro_error = !!val; + else + return -EINVAL; + + return count; +} + +static const struct file_operations dfs_fops = { + .open = dfs_file_open, + .read = dfs_file_read, + .write = dfs_file_write, + .owner = THIS_MODULE, + .llseek = no_llseek, +}; + +/** + * dbg_debugfs_init_fs - initialize debugfs for UBIFS instance. + * @c: UBIFS file-system description object + * + * This function creates all debugfs files for this instance of UBIFS. Returns + * zero in case of success and a negative error code in case of failure. + * + * Note, the only reason we have not merged this function with the + * 'ubifs_debugging_init()' function is because it is better to initialize + * debugfs interfaces at the very end of the mount process, and remove them at + * the very beginning of the mount process. + */ +int dbg_debugfs_init_fs(struct ubifs_info *c) +{ + int err, n; + const char *fname; + struct dentry *dent; + struct ubifs_debug_info *d = c->dbg; + + if (!IS_ENABLED(CONFIG_DEBUG_FS)) + return 0; + + n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, + c->vi.ubi_num, c->vi.vol_id); + if (n == UBIFS_DFS_DIR_LEN) { + /* The array size is too small */ + fname = UBIFS_DFS_DIR_NAME; + dent = ERR_PTR(-EINVAL); + goto out; + } + + fname = d->dfs_dir_name; + dent = debugfs_create_dir(fname, dfs_rootdir); + if (IS_ERR_OR_NULL(dent)) + goto out; + d->dfs_dir = dent; + + fname = "dump_lprops"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_dump_lprops = dent; + + fname = "dump_budg"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_dump_budg = dent; + + fname = "dump_tnc"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_dump_tnc = dent; + + fname = "chk_general"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_gen = dent; + + fname = "chk_index"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_index = dent; + + fname = "chk_orphans"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_orph = dent; + + fname = "chk_lprops"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_lprops = dent; + + fname = "chk_fs"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_fs = dent; + + fname = "tst_recovery"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_tst_rcvry = dent; + + fname = "ro_error"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_ro_error = dent; + + return 0; + +out_remove: + debugfs_remove_recursive(d->dfs_dir); out: - kfree(c->dbg); - return -ENOMEM; + err = dent ? PTR_ERR(dent) : -ENODEV; + ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", + fname, err); + return err; +} + +/** + * dbg_debugfs_exit_fs - remove all debugfs files. + * @c: UBIFS file-system description object + */ +void dbg_debugfs_exit_fs(struct ubifs_info *c) +{ + if (IS_ENABLED(CONFIG_DEBUG_FS)) + debugfs_remove_recursive(c->dbg->dfs_dir); +} + +struct ubifs_global_debug_info ubifs_dbg; + +static struct dentry *dfs_chk_gen; +static struct dentry *dfs_chk_index; +static struct dentry *dfs_chk_orph; +static struct dentry *dfs_chk_lprops; +static struct dentry *dfs_chk_fs; +static struct dentry *dfs_tst_rcvry; + +static ssize_t dfs_global_file_read(struct file *file, char __user *u, + size_t count, loff_t *ppos) +{ + struct dentry *dent = file->f_path.dentry; + int val; + + if (dent == dfs_chk_gen) + val = ubifs_dbg.chk_gen; + else if (dent == dfs_chk_index) + val = ubifs_dbg.chk_index; + else if (dent == dfs_chk_orph) + val = ubifs_dbg.chk_orph; + else if (dent == dfs_chk_lprops) + val = ubifs_dbg.chk_lprops; + else if (dent == dfs_chk_fs) + val = ubifs_dbg.chk_fs; + else if (dent == dfs_tst_rcvry) + val = ubifs_dbg.tst_rcvry; + else + return -EINVAL; + + return provide_user_output(val, u, count, ppos); +} + +static ssize_t dfs_global_file_write(struct file *file, const char __user *u, + size_t count, loff_t *ppos) +{ + struct dentry *dent = file->f_path.dentry; + int val; + + val = interpret_user_input(u, count); + if (val < 0) + return val; + + if (dent == dfs_chk_gen) + ubifs_dbg.chk_gen = val; + else if (dent == dfs_chk_index) + ubifs_dbg.chk_index = val; + else if (dent == dfs_chk_orph) + ubifs_dbg.chk_orph = val; + else if (dent == dfs_chk_lprops) + ubifs_dbg.chk_lprops = val; + else if (dent == dfs_chk_fs) + ubifs_dbg.chk_fs = val; + else if (dent == dfs_tst_rcvry) + ubifs_dbg.tst_rcvry = val; + else + return -EINVAL; + + return count; +} + +static const struct file_operations dfs_global_fops = { + .read = dfs_global_file_read, + .write = dfs_global_file_write, + .owner = THIS_MODULE, + .llseek = no_llseek, +}; + +/** + * dbg_debugfs_init - initialize debugfs file-system. + * + * UBIFS uses debugfs file-system to expose various debugging knobs to + * user-space. This function creates "ubifs" directory in the debugfs + * file-system. Returns zero in case of success and a negative error code in + * case of failure. + */ +int dbg_debugfs_init(void) +{ + int err; + const char *fname; + struct dentry *dent; + + if (!IS_ENABLED(CONFIG_DEBUG_FS)) + return 0; + + fname = "ubifs"; + dent = debugfs_create_dir(fname, NULL); + if (IS_ERR_OR_NULL(dent)) + goto out; + dfs_rootdir = dent; + + fname = "chk_general"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_gen = dent; + + fname = "chk_index"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_index = dent; + + fname = "chk_orphans"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_orph = dent; + + fname = "chk_lprops"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_lprops = dent; + + fname = "chk_fs"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_fs = dent; + + fname = "tst_recovery"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_tst_rcvry = dent; + + return 0; + +out_remove: + debugfs_remove_recursive(dfs_rootdir); +out: + err = dent ? PTR_ERR(dent) : -ENODEV; + ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", + fname, err); + return err; +} + +/** + * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. + */ +void dbg_debugfs_exit(void) +{ + if (IS_ENABLED(CONFIG_DEBUG_FS)) + debugfs_remove_recursive(dfs_rootdir); +} + +/** + * ubifs_debugging_init - initialize UBIFS debugging. + * @c: UBIFS file-system description object + * + * This function initializes debugging-related data for the file system. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_debugging_init(struct ubifs_info *c) +{ + c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); + if (!c->dbg) + return -ENOMEM; + + return 0; } /** @@ -149,8 +3138,6 @@ out: */ void ubifs_debugging_exit(struct ubifs_info *c) { - vfree(c->dbg->buf); kfree(c->dbg); } - -#endif /* CONFIG_UBIFS_FS_DEBUG */ +#endif diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 62617b6..6d325af 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter @@ -23,19 +12,32 @@ #ifndef __UBIFS_DEBUG_H__ #define __UBIFS_DEBUG_H__ -#ifdef CONFIG_UBIFS_FS_DEBUG +#define __UBOOT__ +/* Checking helper functions */ +typedef int (*dbg_leaf_callback)(struct ubifs_info *c, + struct ubifs_zbranch *zbr, void *priv); +typedef int (*dbg_znode_callback)(struct ubifs_info *c, + struct ubifs_znode *znode, void *priv); + +/* + * The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi" + * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte. + */ +#define UBIFS_DFS_DIR_NAME "ubi%d_%d" +#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1) /** * ubifs_debug_info - per-FS debugging information. - * @buf: a buffer of LEB size, used for various purposes * @old_zroot: old index root - used by 'dbg_check_old_index()' * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' - * @failure_mode: failure mode for recovery testing - * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls - * @fail_timeout: time in jiffies when delay of failure mode expires - * @fail_cnt: current number of calls to failure mode I/O functions - * @fail_cnt_max: number of calls by which to delay failure mode + * + * @pc_happened: non-zero if an emulated power cut happened + * @pc_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls + * @pc_timeout: time in jiffies when delay of failure mode expires + * @pc_cnt: current number of calls to failure mode I/O functions + * @pc_cnt_max: number of calls by which to delay failure mode + * * @chk_lpt_sz: used by LPT tree size checker * @chk_lpt_sz2: used by LPT tree size checker * @chk_lpt_wastage: used by LPT tree size checker @@ -45,24 +47,44 @@ * @new_ihead_offs: used by debugging to check @c->ihead_offs * * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') - * @saved_free: saved free space (used by 'dbg_save_space_info()') + * @saved_bi: saved budgeting information + * @saved_free: saved amount of free space + * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt + * + * @chk_gen: if general extra checks are enabled + * @chk_index: if index xtra checks are enabled + * @chk_orph: if orphans extra checks are enabled + * @chk_lprops: if lprops extra checks are enabled + * @chk_fs: if UBIFS contents extra checks are enabled + * @tst_rcvry: if UBIFS recovery testing mode enabled * - * dfs_dir_name: name of debugfs directory containing this file-system's files - * dfs_dir: direntry object of the file-system debugfs directory - * dfs_dump_lprops: "dump lprops" debugfs knob - * dfs_dump_budg: "dump budgeting information" debugfs knob - * dfs_dump_tnc: "dump TNC" debugfs knob + * @dfs_dir_name: name of debugfs directory containing this file-system's files + * @dfs_dir: direntry object of the file-system debugfs directory + * @dfs_dump_lprops: "dump lprops" debugfs knob + * @dfs_dump_budg: "dump budgeting information" debugfs knob + * @dfs_dump_tnc: "dump TNC" debugfs knob + * @dfs_chk_gen: debugfs knob to enable UBIFS general extra checks + * @dfs_chk_index: debugfs knob to enable UBIFS index extra checks + * @dfs_chk_orph: debugfs knob to enable UBIFS orphans extra checks + * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks + * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks + * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing + * @dfs_ro_error: debugfs knob to switch UBIFS to R/O mode (different to + * re-mounting to R/O mode because it does not flush any buffers + * and UBIFS just starts returning -EROFS on all write + * operations) */ struct ubifs_debug_info { - void *buf; struct ubifs_zbranch old_zroot; int old_zroot_level; unsigned long long old_zroot_sqnum; - int failure_mode; - int fail_delay; - unsigned long fail_timeout; - unsigned int fail_cnt; - unsigned int fail_cnt_max; + + int pc_happened; + int pc_delay; + unsigned long pc_timeout; + unsigned int pc_cnt; + unsigned int pc_cnt_max; + long long chk_lpt_sz; long long chk_lpt_sz2; long long chk_lpt_wastage; @@ -72,321 +94,285 @@ struct ubifs_debug_info { int new_ihead_offs; struct ubifs_lp_stats saved_lst; + struct ubifs_budg_info saved_bi; long long saved_free; + int saved_idx_gc_cnt; + + unsigned int chk_gen:1; + unsigned int chk_index:1; + unsigned int chk_orph:1; + unsigned int chk_lprops:1; + unsigned int chk_fs:1; + unsigned int tst_rcvry:1; - char dfs_dir_name[100]; + char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1]; struct dentry *dfs_dir; struct dentry *dfs_dump_lprops; struct dentry *dfs_dump_budg; struct dentry *dfs_dump_tnc; + struct dentry *dfs_chk_gen; + struct dentry *dfs_chk_index; + struct dentry *dfs_chk_orph; + struct dentry *dfs_chk_lprops; + struct dentry *dfs_chk_fs; + struct dentry *dfs_tst_rcvry; + struct dentry *dfs_ro_error; }; -#define UBIFS_DBG(op) op +/** + * ubifs_global_debug_info - global (not per-FS) UBIFS debugging information. + * + * @chk_gen: if general extra checks are enabled + * @chk_index: if index xtra checks are enabled + * @chk_orph: if orphans extra checks are enabled + * @chk_lprops: if lprops extra checks are enabled + * @chk_fs: if UBIFS contents extra checks are enabled + * @tst_rcvry: if UBIFS recovery testing mode enabled + */ +struct ubifs_global_debug_info { + unsigned int chk_gen:1; + unsigned int chk_index:1; + unsigned int chk_orph:1; + unsigned int chk_lprops:1; + unsigned int chk_fs:1; + unsigned int tst_rcvry:1; +}; +#ifndef __UBOOT__ #define ubifs_assert(expr) do { \ if (unlikely(!(expr))) { \ - printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ - __func__, __LINE__, 0); \ - dbg_dump_stack(); \ + pr_crit("UBIFS assert failed in %s at %u (pid %d)\n", \ + __func__, __LINE__, current->pid); \ + dump_stack(); \ } \ } while (0) #define ubifs_assert_cmt_locked(c) do { \ if (unlikely(down_write_trylock(&(c)->commit_sem))) { \ up_write(&(c)->commit_sem); \ - printk(KERN_CRIT "commit lock is not locked!\n"); \ + pr_crit("commit lock is not locked!\n"); \ ubifs_assert(0); \ } \ } while (0) -#define dbg_dump_stack() do { \ - if (!dbg_failure_mode) \ +#define ubifs_dbg_msg(type, fmt, ...) \ + pr_debug("UBIFS DBG " type " (pid %d): " fmt "\n", current->pid, \ + ##__VA_ARGS__) + +#define DBG_KEY_BUF_LEN 48 +#define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ + char __tmp_key_buf[DBG_KEY_BUF_LEN]; \ + pr_debug("UBIFS DBG " type " (pid %d): " fmt "%s\n", current->pid, \ + ##__VA_ARGS__, \ + dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN)); \ +} while (0) +#else +#define ubifs_assert(expr) do { \ + if (unlikely(!(expr))) { \ + pr_crit("UBIFS assert failed in %s at %u\n", \ + __func__, __LINE__); \ dump_stack(); \ + } \ } while (0) -/* Generic debugging messages */ -#define dbg_msg(fmt, ...) do { \ - spin_lock(&dbg_lock); \ - printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", 0, \ - __func__, ##__VA_ARGS__); \ - spin_unlock(&dbg_lock); \ +#define ubifs_assert_cmt_locked(c) do { \ + if (unlikely(down_write_trylock(&(c)->commit_sem))) { \ + up_write(&(c)->commit_sem); \ + pr_crit("commit lock is not locked!\n"); \ + ubifs_assert(0); \ + } \ } while (0) -#define dbg_do_msg(typ, fmt, ...) do { \ - if (ubifs_msg_flags & typ) \ - dbg_msg(fmt, ##__VA_ARGS__); \ +#define ubifs_dbg_msg(type, fmt, ...) \ + pr_debug("UBIFS DBG " type ": " fmt "\n", \ + ##__VA_ARGS__) + +#define DBG_KEY_BUF_LEN 48 +#if defined CONFIG_MTD_DEBUG +#define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ + char __tmp_key_buf[DBG_KEY_BUF_LEN]; \ + pr_debug("UBIFS DBG " type ": " fmt "%s\n", \ + ##__VA_ARGS__, \ + dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN)); \ } while (0) - -#define dbg_err(fmt, ...) do { \ - spin_lock(&dbg_lock); \ - ubifs_err(fmt, ##__VA_ARGS__); \ - spin_unlock(&dbg_lock); \ +#else +#define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ + pr_debug("UBIFS DBG\n"); \ } while (0) -const char *dbg_key_str0(const struct ubifs_info *c, - const union ubifs_key *key); -const char *dbg_key_str1(const struct ubifs_info *c, - const union ubifs_key *key); +#endif -/* - * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message - * macros. - */ -#define DBGKEY(key) dbg_key_str0(c, (key)) -#define DBGKEY1(key) dbg_key_str1(c, (key)) +#endif /* General messages */ -#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) - +#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) /* Additional journal messages */ -#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) - +#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__) +#define dbg_jnlk(key, fmt, ...) \ + ubifs_dbg_msg_key("jnl", key, fmt, ##__VA_ARGS__) /* Additional TNC messages */ -#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) - +#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__) +#define dbg_tnck(key, fmt, ...) \ + ubifs_dbg_msg_key("tnc", key, fmt, ##__VA_ARGS__) /* Additional lprops messages */ -#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) - +#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__) /* Additional LEB find messages */ -#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) - +#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__) /* Additional mount messages */ -#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) - +#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__) +#define dbg_mntk(key, fmt, ...) \ + ubifs_dbg_msg_key("mnt", key, fmt, ##__VA_ARGS__) /* Additional I/O messages */ -#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) - +#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__) /* Additional commit messages */ -#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) - +#define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__) /* Additional budgeting messages */ -#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) - +#define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__) /* Additional log messages */ -#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) - +#define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__) /* Additional gc messages */ -#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) - +#define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__) /* Additional scan messages */ -#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) - +#define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__) /* Additional recovery messages */ -#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) +#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) + +#ifndef __UBOOT__ +extern struct ubifs_global_debug_info ubifs_dbg; + +static inline int dbg_is_chk_gen(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_gen || c->dbg->chk_gen); +} +static inline int dbg_is_chk_index(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_index || c->dbg->chk_index); +} +static inline int dbg_is_chk_orph(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_orph || c->dbg->chk_orph); +} +static inline int dbg_is_chk_lprops(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_lprops || c->dbg->chk_lprops); +} +static inline int dbg_is_chk_fs(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_fs || c->dbg->chk_fs); +} +static inline int dbg_is_tst_rcvry(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.tst_rcvry || c->dbg->tst_rcvry); +} +static inline int dbg_is_power_cut(const struct ubifs_info *c) +{ + return !!c->dbg->pc_happened; +} -/* - * Debugging message type flags (must match msg_type_names in debug.c). - * - * UBIFS_MSG_GEN: general messages - * UBIFS_MSG_JNL: journal messages - * UBIFS_MSG_MNT: mount messages - * UBIFS_MSG_CMT: commit messages - * UBIFS_MSG_FIND: LEB find messages - * UBIFS_MSG_BUDG: budgeting messages - * UBIFS_MSG_GC: garbage collection messages - * UBIFS_MSG_TNC: TNC messages - * UBIFS_MSG_LP: lprops messages - * UBIFS_MSG_IO: I/O messages - * UBIFS_MSG_LOG: log messages - * UBIFS_MSG_SCAN: scan messages - * UBIFS_MSG_RCVRY: recovery messages - */ -enum { - UBIFS_MSG_GEN = 0x1, - UBIFS_MSG_JNL = 0x2, - UBIFS_MSG_MNT = 0x4, - UBIFS_MSG_CMT = 0x8, - UBIFS_MSG_FIND = 0x10, - UBIFS_MSG_BUDG = 0x20, - UBIFS_MSG_GC = 0x40, - UBIFS_MSG_TNC = 0x80, - UBIFS_MSG_LP = 0x100, - UBIFS_MSG_IO = 0x200, - UBIFS_MSG_LOG = 0x400, - UBIFS_MSG_SCAN = 0x800, - UBIFS_MSG_RCVRY = 0x1000, -}; - -/* Debugging message type flags for each default debug message level */ -#define UBIFS_MSG_LVL_0 0 -#define UBIFS_MSG_LVL_1 0x1 -#define UBIFS_MSG_LVL_2 0x7f -#define UBIFS_MSG_LVL_3 0xffff - -/* - * Debugging check flags (must match chk_names in debug.c). - * - * UBIFS_CHK_GEN: general checks - * UBIFS_CHK_TNC: check TNC - * UBIFS_CHK_IDX_SZ: check index size - * UBIFS_CHK_ORPH: check orphans - * UBIFS_CHK_OLD_IDX: check the old index - * UBIFS_CHK_LPROPS: check lprops - * UBIFS_CHK_FS: check the file-system - */ -enum { - UBIFS_CHK_GEN = 0x1, - UBIFS_CHK_TNC = 0x2, - UBIFS_CHK_IDX_SZ = 0x4, - UBIFS_CHK_ORPH = 0x8, - UBIFS_CHK_OLD_IDX = 0x10, - UBIFS_CHK_LPROPS = 0x20, - UBIFS_CHK_FS = 0x40, -}; - -/* - * Special testing flags (must match tst_names in debug.c). - * - * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method - * UBIFS_TST_RCVRY: failure mode for recovery testing - */ -enum { - UBIFS_TST_FORCE_IN_THE_GAPS = 0x2, - UBIFS_TST_RCVRY = 0x4, -}; - -#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1 -#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1 -#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2 -#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2 -#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3 -#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3 -#else -#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0 -#endif - -#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS -#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff +int ubifs_debugging_init(struct ubifs_info *c); +void ubifs_debugging_exit(struct ubifs_info *c); #else -#define UBIFS_CHK_FLAGS_DEFAULT 0 -#endif - -#define dbg_ntype(type) "" -#define dbg_cstate(cmt_state) "" -#define dbg_get_key_dump(c, key) ({}) -#define dbg_dump_inode(c, inode) ({}) -#define dbg_dump_node(c, node) ({}) -#define dbg_dump_budget_req(req) ({}) -#define dbg_dump_lstats(lst) ({}) -#define dbg_dump_budg(c) ({}) -#define dbg_dump_lprop(c, lp) ({}) -#define dbg_dump_lprops(c) ({}) -#define dbg_dump_lpt_info(c) ({}) -#define dbg_dump_leb(c, lnum) ({}) -#define dbg_dump_znode(c, znode) ({}) -#define dbg_dump_heap(c, heap, cat) ({}) -#define dbg_dump_pnode(c, pnode, parent, iip) ({}) -#define dbg_dump_tnc(c) ({}) -#define dbg_dump_index(c) ({}) - -#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 -#define dbg_old_index_check_init(c, zroot) 0 -#define dbg_check_old_index(c, zroot) 0 -#define dbg_check_cats(c) 0 -#define dbg_check_ltab(c) 0 -#define dbg_chk_lpt_free_spc(c) 0 -#define dbg_chk_lpt_sz(c, action, len) 0 -#define dbg_check_synced_i_size(inode) 0 -#define dbg_check_dir_size(c, dir) 0 -#define dbg_check_tnc(c, x) 0 -#define dbg_check_idx_size(c, idx_size) 0 -#define dbg_check_filesystem(c) 0 -#define dbg_check_heap(c, heap, cat, add_pos) ({}) -#define dbg_check_lprops(c) 0 -#define dbg_check_lpt_nodes(c, cnode, row, col) 0 -#define dbg_force_in_the_gaps_enabled 0 -#define dbg_force_in_the_gaps() 0 -#define dbg_failure_mode 0 -#define dbg_failure_mode_registration(c) ({}) -#define dbg_failure_mode_deregistration(c) ({}) +static inline int dbg_is_chk_gen(const struct ubifs_info *c) +{ + return 0; +} +static inline int dbg_is_chk_index(const struct ubifs_info *c) +{ + return 0; +} +static inline int dbg_is_chk_orph(const struct ubifs_info *c) +{ + return 0; +} +static inline int dbg_is_chk_lprops(const struct ubifs_info *c) +{ + return 0; +} +static inline int dbg_is_chk_fs(const struct ubifs_info *c) +{ + return 0; +} +static inline int dbg_is_tst_rcvry(const struct ubifs_info *c) +{ + return 0; +} +static inline int dbg_is_power_cut(const struct ubifs_info *c) +{ + return 0; +} int ubifs_debugging_init(struct ubifs_info *c); void ubifs_debugging_exit(struct ubifs_info *c); -#else /* !CONFIG_UBIFS_FS_DEBUG */ - -#define UBIFS_DBG(op) - -/* Use "if (0)" to make compiler check arguments even if debugging is off */ -#define ubifs_assert(expr) do { \ - if (0 && (expr)) \ - printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ - __func__, __LINE__, 0); \ -} while (0) - -#define dbg_err(fmt, ...) do { \ - if (0) \ - ubifs_err(fmt, ##__VA_ARGS__); \ -} while (0) - -#define dbg_msg(fmt, ...) do { \ - if (0) \ - printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ - 0, __func__, ##__VA_ARGS__); \ -} while (0) - -#define dbg_dump_stack() -#define ubifs_assert_cmt_locked(c) - -#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) - -#define DBGKEY(key) ((char *)(key)) -#define DBGKEY1(key) ((char *)(key)) - -#define ubifs_debugging_init(c) 0 -#define ubifs_debugging_exit(c) ({}) - -#define dbg_ntype(type) "" -#define dbg_cstate(cmt_state) "" -#define dbg_get_key_dump(c, key) ({}) -#define dbg_dump_inode(c, inode) ({}) -#define dbg_dump_node(c, node) ({}) -#define dbg_dump_budget_req(req) ({}) -#define dbg_dump_lstats(lst) ({}) -#define dbg_dump_budg(c) ({}) -#define dbg_dump_lprop(c, lp) ({}) -#define dbg_dump_lprops(c) ({}) -#define dbg_dump_lpt_info(c) ({}) -#define dbg_dump_leb(c, lnum) ({}) -#define dbg_dump_znode(c, znode) ({}) -#define dbg_dump_heap(c, heap, cat) ({}) -#define dbg_dump_pnode(c, pnode, parent, iip) ({}) -#define dbg_dump_tnc(c) ({}) -#define dbg_dump_index(c) ({}) - -#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 -#define dbg_old_index_check_init(c, zroot) 0 -#define dbg_check_old_index(c, zroot) 0 -#define dbg_check_cats(c) 0 -#define dbg_check_ltab(c) 0 -#define dbg_chk_lpt_free_spc(c) 0 -#define dbg_chk_lpt_sz(c, action, len) 0 -#define dbg_check_synced_i_size(inode) 0 -#define dbg_check_dir_size(c, dir) 0 -#define dbg_check_tnc(c, x) 0 -#define dbg_check_idx_size(c, idx_size) 0 -#define dbg_check_filesystem(c) 0 -#define dbg_check_heap(c, heap, cat, add_pos) ({}) -#define dbg_check_lprops(c) 0 -#define dbg_check_lpt_nodes(c, cnode, row, col) 0 -#define dbg_force_in_the_gaps_enabled 0 -#define dbg_force_in_the_gaps() 0 -#define dbg_failure_mode 0 -#define dbg_failure_mode_registration(c) ({}) -#define dbg_failure_mode_deregistration(c) ({}) +#endif -#endif /* !CONFIG_UBIFS_FS_DEBUG */ +/* Dump functions */ +const char *dbg_ntype(int type); +const char *dbg_cstate(int cmt_state); +const char *dbg_jhead(int jhead); +const char *dbg_get_key_dump(const struct ubifs_info *c, + const union ubifs_key *key); +const char *dbg_snprintf_key(const struct ubifs_info *c, + const union ubifs_key *key, char *buffer, int len); +void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode); +void ubifs_dump_node(const struct ubifs_info *c, const void *node); +void ubifs_dump_budget_req(const struct ubifs_budget_req *req); +void ubifs_dump_lstats(const struct ubifs_lp_stats *lst); +void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi); +void ubifs_dump_lprop(const struct ubifs_info *c, + const struct ubifs_lprops *lp); +void ubifs_dump_lprops(struct ubifs_info *c); +void ubifs_dump_lpt_info(struct ubifs_info *c); +void ubifs_dump_leb(const struct ubifs_info *c, int lnum); +void ubifs_dump_sleb(const struct ubifs_info *c, + const struct ubifs_scan_leb *sleb, int offs); +void ubifs_dump_znode(const struct ubifs_info *c, + const struct ubifs_znode *znode); +void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, + int cat); +void ubifs_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, + struct ubifs_nnode *parent, int iip); +void ubifs_dump_tnc(struct ubifs_info *c); +void ubifs_dump_index(struct ubifs_info *c); +void ubifs_dump_lpt_lebs(const struct ubifs_info *c); + +int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, + dbg_znode_callback znode_cb, void *priv); + +/* Checking functions */ +void dbg_save_space_info(struct ubifs_info *c); +int dbg_check_space_info(struct ubifs_info *c); +int dbg_check_lprops(struct ubifs_info *c); +int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); +int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); +int dbg_check_cats(struct ubifs_info *c); +int dbg_check_ltab(struct ubifs_info *c); +int dbg_chk_lpt_free_spc(struct ubifs_info *c); +int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len); +int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode); +int dbg_check_dir(struct ubifs_info *c, const struct inode *dir); +int dbg_check_tnc(struct ubifs_info *c, int extra); +int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); +int dbg_check_filesystem(struct ubifs_info *c); +void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, + int add_pos); +int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, + int row, int col); +int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, + loff_t size); +int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head); +int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); + +int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, + int len); +int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len); +int dbg_leb_unmap(struct ubifs_info *c, int lnum); +int dbg_leb_map(struct ubifs_info *c, int lnum); + +/* Debugfs-related stuff */ +int dbg_debugfs_init(void); +void dbg_debugfs_exit(void); +int dbg_debugfs_init_fs(struct ubifs_info *c); +void dbg_debugfs_exit_fs(struct ubifs_info *c); #endif /* !__UBIFS_DEBUG_H__ */ diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index aae5c65..f87341e 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -4,18 +4,7 @@ * Copyright (C) 2006-2008 Nokia Corporation. * Copyright (C) 2006, 2007 University of Szeged, Hungary * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter @@ -31,6 +20,26 @@ * buffer is full or when it is not used for some time (by timer). This is * similar to the mechanism is used by JFFS2. * + * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum + * write size (@c->max_write_size). The latter is the maximum amount of bytes + * the underlying flash is able to program at a time, and writing in + * @c->max_write_size units should presumably be faster. Obviously, + * @c->min_io_size <= @c->max_write_size. Write-buffers are of + * @c->max_write_size bytes in size for maximum performance. However, when a + * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size + * boundary) which contains data is written, not the whole write-buffer, + * because this is more space-efficient. + * + * This optimization adds few complications to the code. Indeed, on the one + * hand, we want to write in optimal @c->max_write_size bytes chunks, which + * also means aligning writes at the @c->max_write_size bytes offsets. On the + * other hand, we do not want to waste space when synchronizing the write + * buffer, so during synchronization we writes in smaller chunks. And this makes + * the next write offset to be not aligned to @c->max_write_size bytes. So the + * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned + * to @c->max_write_size bytes again. We do this by temporarily shrinking + * write-buffer size (@wbuf->size). + * * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by * mutexes defined inside these objects. Since sometimes upper-level code * has to lock the write-buffer (e.g. journal space reservation code), many @@ -46,10 +55,18 @@ * UBIFS uses padding when it pads to the next min. I/O unit. In this case it * uses padding nodes or padding bytes, if the padding node does not fit. * - * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes - * every time they are read from the flash media. + * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when + * they are read from the flash media. */ +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/crc32.h> +#include <linux/slab.h> +#else +#include <linux/compat.h> +#include <linux/err.h> +#endif #include "ubifs.h" /** @@ -59,12 +76,129 @@ */ void ubifs_ro_mode(struct ubifs_info *c, int err) { - if (!c->ro_media) { - c->ro_media = 1; + if (!c->ro_error) { + c->ro_error = 1; c->no_chk_data_crc = 0; + c->vfs_sb->s_flags |= MS_RDONLY; ubifs_warn("switched to read-only mode, error %d", err); - dbg_dump_stack(); + dump_stack(); + } +} + +/* + * Below are simple wrappers over UBI I/O functions which include some + * additional checks and UBIFS debugging stuff. See corresponding UBI function + * for more information. + */ + +int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, + int len, int even_ebadmsg) +{ + int err; + + err = ubi_read(c->ubi, lnum, buf, offs, len); + /* + * In case of %-EBADMSG print the error message only if the + * @even_ebadmsg is true. + */ + if (err && (err != -EBADMSG || even_ebadmsg)) { + ubifs_err("reading %d bytes from LEB %d:%d failed, error %d", + len, lnum, offs, err); + dump_stack(); + } + return err; +} + +int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, + int len) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + if (!dbg_is_tst_rcvry(c)) + err = ubi_leb_write(c->ubi, lnum, buf, offs, len); + else + err = dbg_leb_write(c, lnum, buf, offs, len); + if (err) { + ubifs_err("writing %d bytes to LEB %d:%d failed, error %d", + len, lnum, offs, err); + ubifs_ro_mode(c, err); + dump_stack(); + } + return err; +} + +int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + if (!dbg_is_tst_rcvry(c)) + err = ubi_leb_change(c->ubi, lnum, buf, len); + else + err = dbg_leb_change(c, lnum, buf, len); + if (err) { + ubifs_err("changing %d bytes in LEB %d failed, error %d", + len, lnum, err); + ubifs_ro_mode(c, err); + dump_stack(); } + return err; +} + +int ubifs_leb_unmap(struct ubifs_info *c, int lnum) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + if (!dbg_is_tst_rcvry(c)) + err = ubi_leb_unmap(c->ubi, lnum); + else + err = dbg_leb_unmap(c, lnum); + if (err) { + ubifs_err("unmap LEB %d failed, error %d", lnum, err); + ubifs_ro_mode(c, err); + dump_stack(); + } + return err; +} + +int ubifs_leb_map(struct ubifs_info *c, int lnum) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + if (!dbg_is_tst_rcvry(c)) + err = ubi_leb_map(c->ubi, lnum); + else + err = dbg_leb_map(c, lnum); + if (err) { + ubifs_err("mapping LEB %d failed, error %d", lnum, err); + ubifs_ro_mode(c, err); + dump_stack(); + } + return err; +} + +int ubifs_is_mapped(const struct ubifs_info *c, int lnum) +{ + int err; + + err = ubi_is_mapped(c->ubi, lnum); + if (err < 0) { + ubifs_err("ubi_is_mapped failed for LEB %d, error %d", + lnum, err); + dump_stack(); + } + return err; } /** @@ -85,8 +219,12 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) * This function may skip data nodes CRC checking if @c->no_chk_data_crc is * true, which is controlled by corresponding UBIFS mount option. However, if * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is - * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is - * ignored and CRC is checked. + * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are + * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC + * is checked. This is because during mounting or re-mounting from R/O mode to + * R/W mode we may read journal nodes (when replying the journal or doing the + * recovery) and the journal nodes may potentially be corrupted, so checking is + * required. * * This function returns zero in case of success and %-EUCLEAN in case of bad * CRC or magic. @@ -128,8 +266,8 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, node_len > c->ranges[type].max_len) goto out_len; - if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && - c->no_chk_data_crc) + if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting && + !c->remounting_rw && c->no_chk_data_crc) return 0; crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); @@ -150,8 +288,8 @@ out_len: out: if (!quiet) { ubifs_err("bad node at LEB %d:%d", lnum, offs); - dbg_dump_node(c, buf); - dbg_dump_stack(); + ubifs_dump_node(c, buf); + dump_stack(); } return err; } @@ -257,6 +395,571 @@ void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad) } /** + * ubifs_prep_grp_node - prepare node of a group to be written to flash. + * @c: UBIFS file-system description object + * @node: the node to pad + * @len: node length + * @last: indicates the last node of the group + * + * This function prepares node at @node to be written to the media - it + * calculates node CRC and fills the common header. + */ +void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) +{ + uint32_t crc; + struct ubifs_ch *ch = node; + unsigned long long sqnum = next_sqnum(c); + + ubifs_assert(len >= UBIFS_CH_SZ); + + ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); + ch->len = cpu_to_le32(len); + if (last) + ch->group_type = UBIFS_LAST_OF_NODE_GROUP; + else + ch->group_type = UBIFS_IN_NODE_GROUP; + ch->sqnum = cpu_to_le64(sqnum); + ch->padding[0] = ch->padding[1] = 0; + crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); + ch->crc = cpu_to_le32(crc); +} + +#ifndef __UBOOT__ +/** + * wbuf_timer_callback - write-buffer timer callback function. + * @data: timer data (write-buffer descriptor) + * + * This function is called when the write-buffer timer expires. + */ +static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer) +{ + struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); + + dbg_io("jhead %s", dbg_jhead(wbuf->jhead)); + wbuf->need_sync = 1; + wbuf->c->need_wbuf_sync = 1; + ubifs_wake_up_bgt(wbuf->c); + return HRTIMER_NORESTART; +} + +/** + * new_wbuf_timer - start new write-buffer timer. + * @wbuf: write-buffer descriptor + */ +static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) +{ + ubifs_assert(!hrtimer_active(&wbuf->timer)); + + if (wbuf->no_timer) + return; + dbg_io("set timer for jhead %s, %llu-%llu millisecs", + dbg_jhead(wbuf->jhead), + div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC), + div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta, + USEC_PER_SEC)); + hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta, + HRTIMER_MODE_REL); +} +#endif + +/** + * cancel_wbuf_timer - cancel write-buffer timer. + * @wbuf: write-buffer descriptor + */ +static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) +{ + if (wbuf->no_timer) + return; + wbuf->need_sync = 0; +#ifndef __UBOOT__ + hrtimer_cancel(&wbuf->timer); +#endif +} + +/** + * ubifs_wbuf_sync_nolock - synchronize write-buffer. + * @wbuf: write-buffer to synchronize + * + * This function synchronizes write-buffer @buf and returns zero in case of + * success or a negative error code in case of failure. + * + * Note, although write-buffers are of @c->max_write_size, this function does + * not necessarily writes all @c->max_write_size bytes to the flash. Instead, + * if the write-buffer is only partially filled with data, only the used part + * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized. + * This way we waste less space. + */ +int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) +{ + struct ubifs_info *c = wbuf->c; + int err, dirt, sync_len; + + cancel_wbuf_timer_nolock(wbuf); + if (!wbuf->used || wbuf->lnum == -1) + /* Write-buffer is empty or not seeked */ + return 0; + + dbg_io("LEB %d:%d, %d bytes, jhead %s", + wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); + ubifs_assert(!(wbuf->avail & 7)); + ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size); + ubifs_assert(wbuf->size >= c->min_io_size); + ubifs_assert(wbuf->size <= c->max_write_size); + ubifs_assert(wbuf->size % c->min_io_size == 0); + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->leb_size - wbuf->offs >= c->max_write_size) + ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); + + if (c->ro_error) + return -EROFS; + + /* + * Do not write whole write buffer but write only the minimum necessary + * amount of min. I/O units. + */ + sync_len = ALIGN(wbuf->used, c->min_io_size); + dirt = sync_len - wbuf->used; + if (dirt) + ubifs_pad(c, wbuf->buf + wbuf->used, dirt); + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, sync_len); + if (err) + return err; + + spin_lock(&wbuf->lock); + wbuf->offs += sync_len; + /* + * Now @wbuf->offs is not necessarily aligned to @c->max_write_size. + * But our goal is to optimize writes and make sure we write in + * @c->max_write_size chunks and to @c->max_write_size-aligned offset. + * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make + * sure that @wbuf->offs + @wbuf->size is aligned to + * @c->max_write_size. This way we make sure that after next + * write-buffer flush we are again at the optimal offset (aligned to + * @c->max_write_size). + */ + if (c->leb_size - wbuf->offs < c->max_write_size) + wbuf->size = c->leb_size - wbuf->offs; + else if (wbuf->offs & (c->max_write_size - 1)) + wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; + else + wbuf->size = c->max_write_size; + wbuf->avail = wbuf->size; + wbuf->used = 0; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); + + if (wbuf->sync_callback) + err = wbuf->sync_callback(c, wbuf->lnum, + c->leb_size - wbuf->offs, dirt); + return err; +} + +/** + * ubifs_wbuf_seek_nolock - seek write-buffer. + * @wbuf: write-buffer + * @lnum: logical eraseblock number to seek to + * @offs: logical eraseblock offset to seek to + * + * This function targets the write-buffer to logical eraseblock @lnum:@offs. + * The write-buffer has to be empty. Returns zero in case of success and a + * negative error code in case of failure. + */ +int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs) +{ + const struct ubifs_info *c = wbuf->c; + + dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead)); + ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); + ubifs_assert(offs >= 0 && offs <= c->leb_size); + ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); + ubifs_assert(lnum != wbuf->lnum); + ubifs_assert(wbuf->used == 0); + + spin_lock(&wbuf->lock); + wbuf->lnum = lnum; + wbuf->offs = offs; + if (c->leb_size - wbuf->offs < c->max_write_size) + wbuf->size = c->leb_size - wbuf->offs; + else if (wbuf->offs & (c->max_write_size - 1)) + wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; + else + wbuf->size = c->max_write_size; + wbuf->avail = wbuf->size; + wbuf->used = 0; + spin_unlock(&wbuf->lock); + + return 0; +} + +#ifndef __UBOOT__ +/** + * ubifs_bg_wbufs_sync - synchronize write-buffers. + * @c: UBIFS file-system description object + * + * This function is called by background thread to synchronize write-buffers. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_bg_wbufs_sync(struct ubifs_info *c) +{ + int err, i; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (!c->need_wbuf_sync) + return 0; + c->need_wbuf_sync = 0; + + if (c->ro_error) { + err = -EROFS; + goto out_timers; + } + + dbg_io("synchronize"); + for (i = 0; i < c->jhead_cnt; i++) { + struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; + + cond_resched(); + + /* + * If the mutex is locked then wbuf is being changed, so + * synchronization is not necessary. + */ + if (mutex_is_locked(&wbuf->io_mutex)) + continue; + + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + if (!wbuf->need_sync) { + mutex_unlock(&wbuf->io_mutex); + continue; + } + + err = ubifs_wbuf_sync_nolock(wbuf); + mutex_unlock(&wbuf->io_mutex); + if (err) { + ubifs_err("cannot sync write-buffer, error %d", err); + ubifs_ro_mode(c, err); + goto out_timers; + } + } + + return 0; + +out_timers: + /* Cancel all timers to prevent repeated errors */ + for (i = 0; i < c->jhead_cnt; i++) { + struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; + + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + cancel_wbuf_timer_nolock(wbuf); + mutex_unlock(&wbuf->io_mutex); + } + return err; +} + +/** + * ubifs_wbuf_write_nolock - write data to flash via write-buffer. + * @wbuf: write-buffer + * @buf: node to write + * @len: node length + * + * This function writes data to flash via write-buffer @wbuf. This means that + * the last piece of the node won't reach the flash media immediately if it + * does not take whole max. write unit (@c->max_write_size). Instead, the node + * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or + * because more data are appended to the write-buffer). + * + * This function returns zero in case of success and a negative error code in + * case of failure. If the node cannot be written because there is no more + * space in this logical eraseblock, %-ENOSPC is returned. + */ +int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) +{ + struct ubifs_info *c = wbuf->c; + int err, written, n, aligned_len = ALIGN(len, 8); + + dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, + dbg_ntype(((struct ubifs_ch *)buf)->node_type), + dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used); + ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); + ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); + ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); + ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size); + ubifs_assert(wbuf->size >= c->min_io_size); + ubifs_assert(wbuf->size <= c->max_write_size); + ubifs_assert(wbuf->size % c->min_io_size == 0); + ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); + ubifs_assert(!c->ro_media && !c->ro_mount); + ubifs_assert(!c->space_fixup); + if (c->leb_size - wbuf->offs >= c->max_write_size) + ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); + + if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { + err = -ENOSPC; + goto out; + } + + cancel_wbuf_timer_nolock(wbuf); + + if (c->ro_error) + return -EROFS; + + if (aligned_len <= wbuf->avail) { + /* + * The node is not very large and fits entirely within + * write-buffer. + */ + memcpy(wbuf->buf + wbuf->used, buf, len); + + if (aligned_len == wbuf->avail) { + dbg_io("flush jhead %s wbuf to LEB %d:%d", + dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, + wbuf->offs, wbuf->size); + if (err) + goto out; + + spin_lock(&wbuf->lock); + wbuf->offs += wbuf->size; + if (c->leb_size - wbuf->offs >= c->max_write_size) + wbuf->size = c->max_write_size; + else + wbuf->size = c->leb_size - wbuf->offs; + wbuf->avail = wbuf->size; + wbuf->used = 0; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); + } else { + spin_lock(&wbuf->lock); + wbuf->avail -= aligned_len; + wbuf->used += aligned_len; + spin_unlock(&wbuf->lock); + } + + goto exit; + } + + written = 0; + + if (wbuf->used) { + /* + * The node is large enough and does not fit entirely within + * current available space. We have to fill and flush + * write-buffer and switch to the next max. write unit. + */ + dbg_io("flush jhead %s wbuf to LEB %d:%d", + dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); + memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, + wbuf->size); + if (err) + goto out; + + wbuf->offs += wbuf->size; + len -= wbuf->avail; + aligned_len -= wbuf->avail; + written += wbuf->avail; + } else if (wbuf->offs & (c->max_write_size - 1)) { + /* + * The write-buffer offset is not aligned to + * @c->max_write_size and @wbuf->size is less than + * @c->max_write_size. Write @wbuf->size bytes to make sure the + * following writes are done in optimal @c->max_write_size + * chunks. + */ + dbg_io("write %d bytes to LEB %d:%d", + wbuf->size, wbuf->lnum, wbuf->offs); + err = ubifs_leb_write(c, wbuf->lnum, buf, wbuf->offs, + wbuf->size); + if (err) + goto out; + + wbuf->offs += wbuf->size; + len -= wbuf->size; + aligned_len -= wbuf->size; + written += wbuf->size; + } + + /* + * The remaining data may take more whole max. write units, so write the + * remains multiple to max. write unit size directly to the flash media. + * We align node length to 8-byte boundary because we anyway flash wbuf + * if the remaining space is less than 8 bytes. + */ + n = aligned_len >> c->max_write_shift; + if (n) { + n <<= c->max_write_shift; + dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, + wbuf->offs); + err = ubifs_leb_write(c, wbuf->lnum, buf + written, + wbuf->offs, n); + if (err) + goto out; + wbuf->offs += n; + aligned_len -= n; + len -= n; + written += n; + } + + spin_lock(&wbuf->lock); + if (aligned_len) + /* + * And now we have what's left and what does not take whole + * max. write unit, so write it to the write-buffer and we are + * done. + */ + memcpy(wbuf->buf, buf + written, len); + + if (c->leb_size - wbuf->offs >= c->max_write_size) + wbuf->size = c->max_write_size; + else + wbuf->size = c->leb_size - wbuf->offs; + wbuf->avail = wbuf->size - aligned_len; + wbuf->used = aligned_len; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); + +exit: + if (wbuf->sync_callback) { + int free = c->leb_size - wbuf->offs - wbuf->used; + + err = wbuf->sync_callback(c, wbuf->lnum, free, 0); + if (err) + goto out; + } + + if (wbuf->used) + new_wbuf_timer_nolock(wbuf); + + return 0; + +out: + ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", + len, wbuf->lnum, wbuf->offs, err); + ubifs_dump_node(c, buf); + dump_stack(); + ubifs_dump_leb(c, wbuf->lnum); + return err; +} + +/** + * ubifs_write_node - write node to the media. + * @c: UBIFS file-system description object + * @buf: the node to write + * @len: node length + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * + * This function automatically fills node magic number, assigns sequence + * number, and calculates node CRC checksum. The length of the @buf buffer has + * to be aligned to the minimal I/O unit size. This function automatically + * appends padding node and padding bytes if needed. Returns zero in case of + * success and a negative error code in case of failure. + */ +int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, + int offs) +{ + int err, buf_len = ALIGN(len, c->min_io_size); + + dbg_io("LEB %d:%d, %s, length %d (aligned %d)", + lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len, + buf_len); + ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); + ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); + ubifs_assert(!c->ro_media && !c->ro_mount); + ubifs_assert(!c->space_fixup); + + if (c->ro_error) + return -EROFS; + + ubifs_prepare_node(c, buf, len, 1); + err = ubifs_leb_write(c, lnum, buf, offs, buf_len); + if (err) + ubifs_dump_node(c, buf); + + return err; +} +#endif + +/** + * ubifs_read_node_wbuf - read node from the media or write-buffer. + * @wbuf: wbuf to check for un-written data + * @buf: buffer to read to + * @type: node type + * @len: node length + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * + * This function reads a node of known type and length, checks it and stores + * in @buf. If the node partially or fully sits in the write-buffer, this + * function takes data from the buffer, otherwise it reads the flash media. + * Returns zero in case of success, %-EUCLEAN if CRC mismatched and a negative + * error code in case of failure. + */ +int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, + int lnum, int offs) +{ + const struct ubifs_info *c = wbuf->c; + int err, rlen, overlap; + struct ubifs_ch *ch = buf; + + dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs, + dbg_ntype(type), len, dbg_jhead(wbuf->jhead)); + ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); + ubifs_assert(!(offs & 7) && offs < c->leb_size); + ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); + + spin_lock(&wbuf->lock); + overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs); + if (!overlap) { + /* We may safely unlock the write-buffer and read the data */ + spin_unlock(&wbuf->lock); + return ubifs_read_node(c, buf, type, len, lnum, offs); + } + + /* Don't read under wbuf */ + rlen = wbuf->offs - offs; + if (rlen < 0) + rlen = 0; + + /* Copy the rest from the write-buffer */ + memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen); + spin_unlock(&wbuf->lock); + + if (rlen > 0) { + /* Read everything that goes before write-buffer */ + err = ubifs_leb_read(c, lnum, buf, offs, rlen, 0); + if (err && err != -EBADMSG) + return err; + } + + if (type != ch->node_type) { + ubifs_err("bad node type (%d but expected %d)", + ch->node_type, type); + goto out; + } + + err = ubifs_check_node(c, buf, lnum, offs, 0, 0); + if (err) { + ubifs_err("expected node type %d", type); + return err; + } + + rlen = le32_to_cpu(ch->len); + if (rlen != len) { + ubifs_err("bad node length %d, expected %d", rlen, len); + goto out; + } + + return 0; + +out: + ubifs_err("bad node at LEB %d:%d", lnum, offs); + ubifs_dump_node(c, buf); + dump_stack(); + return -EINVAL; +} + +/** * ubifs_read_node - read node. * @c: UBIFS file-system description object * @buf: buffer to read to @@ -281,12 +984,9 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, ubifs_assert(!(offs & 7) && offs < c->leb_size); ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); - err = ubi_read(c->ubi, lnum, buf, offs, len); - if (err && err != -EBADMSG) { - ubifs_err("cannot read node %d from LEB %d:%d, error %d", - type, lnum, offs, err); + err = ubifs_leb_read(c, lnum, buf, offs, len, 0); + if (err && err != -EBADMSG) return err; - } if (type != ch->node_type) { ubifs_err("bad node type (%d but expected %d)", @@ -309,8 +1009,143 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, return 0; out: - ubifs_err("bad node at LEB %d:%d", lnum, offs); - dbg_dump_node(c, buf); - dbg_dump_stack(); + ubifs_err("bad node at LEB %d:%d, LEB mapping status %d", lnum, offs, + ubi_is_mapped(c->ubi, lnum)); + ubifs_dump_node(c, buf); + dump_stack(); return -EINVAL; } + +/** + * ubifs_wbuf_init - initialize write-buffer. + * @c: UBIFS file-system description object + * @wbuf: write-buffer to initialize + * + * This function initializes write-buffer. Returns zero in case of success + * %-ENOMEM in case of failure. + */ +int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) +{ + size_t size; + + wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL); + if (!wbuf->buf) + return -ENOMEM; + + size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); + wbuf->inodes = kmalloc(size, GFP_KERNEL); + if (!wbuf->inodes) { + kfree(wbuf->buf); + wbuf->buf = NULL; + return -ENOMEM; + } + + wbuf->used = 0; + wbuf->lnum = wbuf->offs = -1; + /* + * If the LEB starts at the max. write size aligned address, then + * write-buffer size has to be set to @c->max_write_size. Otherwise, + * set it to something smaller so that it ends at the closest max. + * write size boundary. + */ + size = c->max_write_size - (c->leb_start % c->max_write_size); + wbuf->avail = wbuf->size = size; + wbuf->sync_callback = NULL; + mutex_init(&wbuf->io_mutex); + spin_lock_init(&wbuf->lock); + wbuf->c = c; + wbuf->next_ino = 0; + +#ifndef __UBOOT__ + hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + wbuf->timer.function = wbuf_timer_callback_nolock; + wbuf->softlimit = ktime_set(WBUF_TIMEOUT_SOFTLIMIT, 0); + wbuf->delta = WBUF_TIMEOUT_HARDLIMIT - WBUF_TIMEOUT_SOFTLIMIT; + wbuf->delta *= 1000000000ULL; + ubifs_assert(wbuf->delta <= ULONG_MAX); +#endif + return 0; +} + +/** + * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array. + * @wbuf: the write-buffer where to add + * @inum: the inode number + * + * This function adds an inode number to the inode array of the write-buffer. + */ +void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum) +{ + if (!wbuf->buf) + /* NOR flash or something similar */ + return; + + spin_lock(&wbuf->lock); + if (wbuf->used) + wbuf->inodes[wbuf->next_ino++] = inum; + spin_unlock(&wbuf->lock); +} + +/** + * wbuf_has_ino - returns if the wbuf contains data from the inode. + * @wbuf: the write-buffer + * @inum: the inode number + * + * This function returns with %1 if the write-buffer contains some data from the + * given inode otherwise it returns with %0. + */ +static int wbuf_has_ino(struct ubifs_wbuf *wbuf, ino_t inum) +{ + int i, ret = 0; + + spin_lock(&wbuf->lock); + for (i = 0; i < wbuf->next_ino; i++) + if (inum == wbuf->inodes[i]) { + ret = 1; + break; + } + spin_unlock(&wbuf->lock); + + return ret; +} + +/** + * ubifs_sync_wbufs_by_inode - synchronize write-buffers for an inode. + * @c: UBIFS file-system description object + * @inode: inode to synchronize + * + * This function synchronizes write-buffers which contain nodes belonging to + * @inode. Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode) +{ + int i, err = 0; + + for (i = 0; i < c->jhead_cnt; i++) { + struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; + + if (i == GCHD) + /* + * GC head is special, do not look at it. Even if the + * head contains something related to this inode, it is + * a _copy_ of corresponding on-flash node which sits + * somewhere else. + */ + continue; + + if (!wbuf_has_ino(wbuf, inode->i_ino)) + continue; + + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + if (wbuf_has_ino(wbuf, inode->i_ino)) + err = ubifs_wbuf_sync_nolock(wbuf); + mutex_unlock(&wbuf->io_mutex); + + if (err) { + ubifs_ro_mode(c, err); + return err; + } + } + return 0; +} diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index efb3430..b5c4884 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter @@ -229,23 +218,6 @@ static inline void xent_key_init(const struct ubifs_info *c, } /** - * xent_key_init_hash - initialize extended attribute entry key without - * re-calculating hash function. - * @c: UBIFS file-system description object - * @key: key to initialize - * @inum: host inode number - * @hash: extended attribute entry name hash - */ -static inline void xent_key_init_hash(const struct ubifs_info *c, - union ubifs_key *key, ino_t inum, - uint32_t hash) -{ - ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); - key->u32[0] = inum; - key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); -} - -/** * xent_key_init_flash - initialize on-flash extended attribute entry key. * @c: UBIFS file-system description object * @k: key to initialize @@ -295,22 +267,15 @@ static inline void data_key_init(const struct ubifs_info *c, } /** - * data_key_init_flash - initialize on-flash data key. + * highest_data_key - get the highest possible data key for an inode. * @c: UBIFS file-system description object - * @k: key to initialize + * @key: key to initialize * @inum: inode number - * @block: block number */ -static inline void data_key_init_flash(const struct ubifs_info *c, void *k, - ino_t inum, unsigned int block) +static inline void highest_data_key(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) { - union ubifs_key *key = k; - - ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); - key->j32[0] = cpu_to_le32(inum); - key->j32[1] = cpu_to_le32(block | - (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS)); - memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); + data_key_init(c, key, inum, UBIFS_S_KEY_BLOCK_MASK); } /** @@ -330,6 +295,20 @@ static inline void trun_key_init(const struct ubifs_info *c, } /** + * invalid_key_init - initialize invalid node key. + * @c: UBIFS file-system description object + * @key: key to initialize + * + * This is a helper function which marks a @key object as invalid. + */ +static inline void invalid_key_init(const struct ubifs_info *c, + union ubifs_key *key) +{ + key->u32[0] = 0xDEADBEAF; + key->u32[1] = UBIFS_INVALID_KEY; +} + +/** * key_type - get key type. * @c: UBIFS file-system description object * @key: key to get type of @@ -381,8 +360,8 @@ static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k) * @c: UBIFS file-system description object * @key: the key to get hash from */ -static inline int key_hash(const struct ubifs_info *c, - const union ubifs_key *key) +static inline uint32_t key_hash(const struct ubifs_info *c, + const union ubifs_key *key) { return key->u32[1] & UBIFS_S_KEY_HASH_MASK; } @@ -392,7 +371,7 @@ static inline int key_hash(const struct ubifs_info *c, * @c: UBIFS file-system description object * @k: the key to get hash from */ -static inline int key_hash_flash(const struct ubifs_info *c, const void *k) +static inline uint32_t key_hash_flash(const struct ubifs_info *c, const void *k) { const union ubifs_key *key = k; @@ -554,4 +533,5 @@ static inline unsigned long long key_max_inode_size(const struct ubifs_info *c) return 0; } } + #endif /* !__UBIFS_KEY_H__ */ diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 68a9bd9..ced0424 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter @@ -27,8 +16,14 @@ * journal. */ +#define __UBOOT__ +#ifdef __UBOOT__ +#include <linux/err.h> +#endif #include "ubifs.h" +static int dbg_check_bud_bytes(struct ubifs_info *c); + /** * ubifs_search_bud - search bud LEB. * @c: UBIFS file-system description object @@ -60,6 +55,57 @@ struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum) } /** + * ubifs_get_wbuf - get the wbuf associated with a LEB, if there is one. + * @c: UBIFS file-system description object + * @lnum: logical eraseblock number to search + * + * This functions returns the wbuf for @lnum or %NULL if there is not one. + */ +struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum) +{ + struct rb_node *p; + struct ubifs_bud *bud; + int jhead; + + if (!c->jheads) + return NULL; + + spin_lock(&c->buds_lock); + p = c->buds.rb_node; + while (p) { + bud = rb_entry(p, struct ubifs_bud, rb); + if (lnum < bud->lnum) + p = p->rb_left; + else if (lnum > bud->lnum) + p = p->rb_right; + else { + jhead = bud->jhead; + spin_unlock(&c->buds_lock); + return &c->jheads[jhead].wbuf; + } + } + spin_unlock(&c->buds_lock); + return NULL; +} + +/** + * empty_log_bytes - calculate amount of empty space in the log. + * @c: UBIFS file-system description object + */ +static inline long long empty_log_bytes(const struct ubifs_info *c) +{ + long long h, t; + + h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs; + t = (long long)c->ltail_lnum * c->leb_size; + + if (h >= t) + return c->log_bytes - h + t; + else + return t - h; +} + +/** * ubifs_add_bud - add bud LEB to the tree of buds and its journal head list. * @c: UBIFS file-system description object * @bud: the bud to add @@ -88,7 +134,7 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) jhead = &c->jheads[bud->jhead]; list_add_tail(&bud->list, &jhead->buds_list); } else - ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY)); + ubifs_assert(c->replaying && c->ro_mount); /* * Note, although this is a new bud, we anyway account this space now, @@ -98,7 +144,594 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) */ c->bud_bytes += c->leb_size - bud->start; - dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum, - bud->start, bud->jhead, c->bud_bytes); + dbg_log("LEB %d:%d, jhead %s, bud_bytes %lld", bud->lnum, + bud->start, dbg_jhead(bud->jhead), c->bud_bytes); + spin_unlock(&c->buds_lock); +} + +/** + * ubifs_add_bud_to_log - add a new bud to the log. + * @c: UBIFS file-system description object + * @jhead: journal head the bud belongs to + * @lnum: LEB number of the bud + * @offs: starting offset of the bud + * + * This function writes reference node for the new bud LEB @lnum it to the log, + * and adds it to the buds tress. It also makes sure that log size does not + * exceed the 'c->max_bud_bytes' limit. Returns zero in case of success, + * %-EAGAIN if commit is required, and a negative error codes in case of + * failure. + */ +int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) +{ + int err; + struct ubifs_bud *bud; + struct ubifs_ref_node *ref; + + bud = kmalloc(sizeof(struct ubifs_bud), GFP_NOFS); + if (!bud) + return -ENOMEM; + ref = kzalloc(c->ref_node_alsz, GFP_NOFS); + if (!ref) { + kfree(bud); + return -ENOMEM; + } + + mutex_lock(&c->log_mutex); + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) { + err = -EROFS; + goto out_unlock; + } + + /* Make sure we have enough space in the log */ + if (empty_log_bytes(c) - c->ref_node_alsz < c->min_log_bytes) { + dbg_log("not enough log space - %lld, required %d", + empty_log_bytes(c), c->min_log_bytes); + ubifs_commit_required(c); + err = -EAGAIN; + goto out_unlock; + } + + /* + * Make sure the amount of space in buds will not exceed the + * 'c->max_bud_bytes' limit, because we want to guarantee mount time + * limits. + * + * It is not necessary to hold @c->buds_lock when reading @c->bud_bytes + * because we are holding @c->log_mutex. All @c->bud_bytes take place + * when both @c->log_mutex and @c->bud_bytes are locked. + */ + if (c->bud_bytes + c->leb_size - offs > c->max_bud_bytes) { + dbg_log("bud bytes %lld (%lld max), require commit", + c->bud_bytes, c->max_bud_bytes); + ubifs_commit_required(c); + err = -EAGAIN; + goto out_unlock; + } + + /* + * If the journal is full enough - start background commit. Note, it is + * OK to read 'c->cmt_state' without spinlock because integer reads + * are atomic in the kernel. + */ + if (c->bud_bytes >= c->bg_bud_bytes && + c->cmt_state == COMMIT_RESTING) { + dbg_log("bud bytes %lld (%lld max), initiate BG commit", + c->bud_bytes, c->max_bud_bytes); + ubifs_request_bg_commit(c); + } + + bud->lnum = lnum; + bud->start = offs; + bud->jhead = jhead; + + ref->ch.node_type = UBIFS_REF_NODE; + ref->lnum = cpu_to_le32(bud->lnum); + ref->offs = cpu_to_le32(bud->start); + ref->jhead = cpu_to_le32(jhead); + + if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { + c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + + if (c->lhead_offs == 0) { + /* Must ensure next log LEB has been unmapped */ + err = ubifs_leb_unmap(c, c->lhead_lnum); + if (err) + goto out_unlock; + } + + if (bud->start == 0) { + /* + * Before writing the LEB reference which refers an empty LEB + * to the log, we have to make sure it is mapped, because + * otherwise we'd risk to refer an LEB with garbage in case of + * an unclean reboot, because the target LEB might have been + * unmapped, but not yet physically erased. + */ + err = ubifs_leb_map(c, bud->lnum); + if (err) + goto out_unlock; + } + + dbg_log("write ref LEB %d:%d", + c->lhead_lnum, c->lhead_offs); + err = ubifs_write_node(c, ref, UBIFS_REF_NODE_SZ, c->lhead_lnum, + c->lhead_offs); + if (err) + goto out_unlock; + + c->lhead_offs += c->ref_node_alsz; + + ubifs_add_bud(c, bud); + + mutex_unlock(&c->log_mutex); + kfree(ref); + return 0; + +out_unlock: + mutex_unlock(&c->log_mutex); + kfree(ref); + kfree(bud); + return err; +} + +/** + * remove_buds - remove used buds. + * @c: UBIFS file-system description object + * + * This function removes use buds from the buds tree. It does not remove the + * buds which are pointed to by journal heads. + */ +static void remove_buds(struct ubifs_info *c) +{ + struct rb_node *p; + + ubifs_assert(list_empty(&c->old_buds)); + c->cmt_bud_bytes = 0; + spin_lock(&c->buds_lock); + p = rb_first(&c->buds); + while (p) { + struct rb_node *p1 = p; + struct ubifs_bud *bud; + struct ubifs_wbuf *wbuf; + + p = rb_next(p); + bud = rb_entry(p1, struct ubifs_bud, rb); + wbuf = &c->jheads[bud->jhead].wbuf; + + if (wbuf->lnum == bud->lnum) { + /* + * Do not remove buds which are pointed to by journal + * heads (non-closed buds). + */ + c->cmt_bud_bytes += wbuf->offs - bud->start; + dbg_log("preserve %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld", + bud->lnum, bud->start, dbg_jhead(bud->jhead), + wbuf->offs - bud->start, c->cmt_bud_bytes); + bud->start = wbuf->offs; + } else { + c->cmt_bud_bytes += c->leb_size - bud->start; + dbg_log("remove %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld", + bud->lnum, bud->start, dbg_jhead(bud->jhead), + c->leb_size - bud->start, c->cmt_bud_bytes); + rb_erase(p1, &c->buds); + /* + * If the commit does not finish, the recovery will need + * to replay the journal, in which case the old buds + * must be unchanged. Do not release them until post + * commit i.e. do not allow them to be garbage + * collected. + */ + list_move(&bud->list, &c->old_buds); + } + } + spin_unlock(&c->buds_lock); +} + +/** + * ubifs_log_start_commit - start commit. + * @c: UBIFS file-system description object + * @ltail_lnum: return new log tail LEB number + * + * The commit operation starts with writing "commit start" node to the log and + * reference nodes for all journal heads which will define new journal after + * the commit has been finished. The commit start and reference nodes are + * written in one go to the nearest empty log LEB (hence, when commit is + * finished UBIFS may safely unmap all the previous log LEBs). This function + * returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) +{ + void *buf; + struct ubifs_cs_node *cs; + struct ubifs_ref_node *ref; + int err, i, max_len, len; + + err = dbg_check_bud_bytes(c); + if (err) + return err; + + max_len = UBIFS_CS_NODE_SZ + c->jhead_cnt * UBIFS_REF_NODE_SZ; + max_len = ALIGN(max_len, c->min_io_size); + buf = cs = kmalloc(max_len, GFP_NOFS); + if (!buf) + return -ENOMEM; + + cs->ch.node_type = UBIFS_CS_NODE; + cs->cmt_no = cpu_to_le64(c->cmt_no); + ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0); + + /* + * Note, we do not lock 'c->log_mutex' because this is the commit start + * phase and we are exclusively using the log. And we do not lock + * write-buffer because nobody can write to the file-system at this + * phase. + */ + + len = UBIFS_CS_NODE_SZ; + for (i = 0; i < c->jhead_cnt; i++) { + int lnum = c->jheads[i].wbuf.lnum; + int offs = c->jheads[i].wbuf.offs; + + if (lnum == -1 || offs == c->leb_size) + continue; + + dbg_log("add ref to LEB %d:%d for jhead %s", + lnum, offs, dbg_jhead(i)); + ref = buf + len; + ref->ch.node_type = UBIFS_REF_NODE; + ref->lnum = cpu_to_le32(lnum); + ref->offs = cpu_to_le32(offs); + ref->jhead = cpu_to_le32(i); + + ubifs_prepare_node(c, ref, UBIFS_REF_NODE_SZ, 0); + len += UBIFS_REF_NODE_SZ; + } + + ubifs_pad(c, buf + len, ALIGN(len, c->min_io_size) - len); + + /* Switch to the next log LEB */ + if (c->lhead_offs) { + c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + + if (c->lhead_offs == 0) { + /* Must ensure next LEB has been unmapped */ + err = ubifs_leb_unmap(c, c->lhead_lnum); + if (err) + goto out; + } + + len = ALIGN(len, c->min_io_size); + dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len); + err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len); + if (err) + goto out; + + *ltail_lnum = c->lhead_lnum; + + c->lhead_offs += len; + if (c->lhead_offs == c->leb_size) { + c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + + remove_buds(c); + + /* + * We have started the commit and now users may use the rest of the log + * for new writes. + */ + c->min_log_bytes = 0; + +out: + kfree(buf); + return err; +} + +/** + * ubifs_log_end_commit - end commit. + * @c: UBIFS file-system description object + * @ltail_lnum: new log tail LEB number + * + * This function is called on when the commit operation was finished. It + * moves log tail to new position and unmaps LEBs which contain obsolete data. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum) +{ + int err; + + /* + * At this phase we have to lock 'c->log_mutex' because UBIFS allows FS + * writes during commit. Its only short "commit" start phase when + * writers are blocked. + */ + mutex_lock(&c->log_mutex); + + dbg_log("old tail was LEB %d:0, new tail is LEB %d:0", + c->ltail_lnum, ltail_lnum); + + c->ltail_lnum = ltail_lnum; + /* + * The commit is finished and from now on it must be guaranteed that + * there is always enough space for the next commit. + */ + c->min_log_bytes = c->leb_size; + + spin_lock(&c->buds_lock); + c->bud_bytes -= c->cmt_bud_bytes; + spin_unlock(&c->buds_lock); + + err = dbg_check_bud_bytes(c); + + mutex_unlock(&c->log_mutex); + return err; +} + +/** + * ubifs_log_post_commit - things to do after commit is completed. + * @c: UBIFS file-system description object + * @old_ltail_lnum: old log tail LEB number + * + * Release buds only after commit is completed, because they must be unchanged + * if recovery is needed. + * + * Unmap log LEBs only after commit is completed, because they may be needed for + * recovery. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum) +{ + int lnum, err = 0; + + while (!list_empty(&c->old_buds)) { + struct ubifs_bud *bud; + + bud = list_entry(c->old_buds.next, struct ubifs_bud, list); + err = ubifs_return_leb(c, bud->lnum); + if (err) + return err; + list_del(&bud->list); + kfree(bud); + } + mutex_lock(&c->log_mutex); + for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; + lnum = ubifs_next_log_lnum(c, lnum)) { + dbg_log("unmap log LEB %d", lnum); + err = ubifs_leb_unmap(c, lnum); + if (err) + goto out; + } +out: + mutex_unlock(&c->log_mutex); + return err; +} + +/** + * struct done_ref - references that have been done. + * @rb: rb-tree node + * @lnum: LEB number + */ +struct done_ref { + struct rb_node rb; + int lnum; +}; + +/** + * done_already - determine if a reference has been done already. + * @done_tree: rb-tree to store references that have been done + * @lnum: LEB number of reference + * + * This function returns %1 if the reference has been done, %0 if not, otherwise + * a negative error code is returned. + */ +static int done_already(struct rb_root *done_tree, int lnum) +{ + struct rb_node **p = &done_tree->rb_node, *parent = NULL; + struct done_ref *dr; + + while (*p) { + parent = *p; + dr = rb_entry(parent, struct done_ref, rb); + if (lnum < dr->lnum) + p = &(*p)->rb_left; + else if (lnum > dr->lnum) + p = &(*p)->rb_right; + else + return 1; + } + + dr = kzalloc(sizeof(struct done_ref), GFP_NOFS); + if (!dr) + return -ENOMEM; + + dr->lnum = lnum; + + rb_link_node(&dr->rb, parent, p); + rb_insert_color(&dr->rb, done_tree); + + return 0; +} + +/** + * destroy_done_tree - destroy the done tree. + * @done_tree: done tree to destroy + */ +static void destroy_done_tree(struct rb_root *done_tree) +{ + struct done_ref *dr, *n; + + rbtree_postorder_for_each_entry_safe(dr, n, done_tree, rb) + kfree(dr); +} + +/** + * add_node - add a node to the consolidated log. + * @c: UBIFS file-system description object + * @buf: buffer to which to add + * @lnum: LEB number to which to write is passed and returned here + * @offs: offset to where to write is passed and returned here + * @node: node to add + * + * This function returns %0 on success and a negative error code on failure. + */ +static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs, + void *node) +{ + struct ubifs_ch *ch = node; + int len = le32_to_cpu(ch->len), remains = c->leb_size - *offs; + + if (len > remains) { + int sz = ALIGN(*offs, c->min_io_size), err; + + ubifs_pad(c, buf + *offs, sz - *offs); + err = ubifs_leb_change(c, *lnum, buf, sz); + if (err) + return err; + *lnum = ubifs_next_log_lnum(c, *lnum); + *offs = 0; + } + memcpy(buf + *offs, node, len); + *offs += ALIGN(len, 8); + return 0; +} + +/** + * ubifs_consolidate_log - consolidate the log. + * @c: UBIFS file-system description object + * + * Repeated failed commits could cause the log to be full, but at least 1 LEB is + * needed for commit. This function rewrites the reference nodes in the log + * omitting duplicates, and failed CS nodes, and leaving no gaps. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_consolidate_log(struct ubifs_info *c) +{ + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + struct rb_root done_tree = RB_ROOT; + int lnum, err, first = 1, write_lnum, offs = 0; + void *buf; + + dbg_rcvry("log tail LEB %d, log head LEB %d", c->ltail_lnum, + c->lhead_lnum); + buf = vmalloc(c->leb_size); + if (!buf) + return -ENOMEM; + lnum = c->ltail_lnum; + write_lnum = lnum; + while (1) { + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0); + if (IS_ERR(sleb)) { + err = PTR_ERR(sleb); + goto out_free; + } + list_for_each_entry(snod, &sleb->nodes, list) { + switch (snod->type) { + case UBIFS_REF_NODE: { + struct ubifs_ref_node *ref = snod->node; + int ref_lnum = le32_to_cpu(ref->lnum); + + err = done_already(&done_tree, ref_lnum); + if (err < 0) + goto out_scan; + if (err != 1) { + err = add_node(c, buf, &write_lnum, + &offs, snod->node); + if (err) + goto out_scan; + } + break; + } + case UBIFS_CS_NODE: + if (!first) + break; + err = add_node(c, buf, &write_lnum, &offs, + snod->node); + if (err) + goto out_scan; + first = 0; + break; + } + } + ubifs_scan_destroy(sleb); + if (lnum == c->lhead_lnum) + break; + lnum = ubifs_next_log_lnum(c, lnum); + } + if (offs) { + int sz = ALIGN(offs, c->min_io_size); + + ubifs_pad(c, buf + offs, sz - offs); + err = ubifs_leb_change(c, write_lnum, buf, sz); + if (err) + goto out_free; + offs = ALIGN(offs, c->min_io_size); + } + destroy_done_tree(&done_tree); + vfree(buf); + if (write_lnum == c->lhead_lnum) { + ubifs_err("log is too full"); + return -EINVAL; + } + /* Unmap remaining LEBs */ + lnum = write_lnum; + do { + lnum = ubifs_next_log_lnum(c, lnum); + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } while (lnum != c->lhead_lnum); + c->lhead_lnum = write_lnum; + c->lhead_offs = offs; + dbg_rcvry("new log head at %d:%d", c->lhead_lnum, c->lhead_offs); + return 0; + +out_scan: + ubifs_scan_destroy(sleb); +out_free: + destroy_done_tree(&done_tree); + vfree(buf); + return err; +} + +/** + * dbg_check_bud_bytes - make sure bud bytes calculation are all right. + * @c: UBIFS file-system description object + * + * This function makes sure the amount of flash space used by closed buds + * ('c->bud_bytes' is correct). Returns zero in case of success and %-EINVAL in + * case of failure. + */ +static int dbg_check_bud_bytes(struct ubifs_info *c) +{ + int i, err = 0; + struct ubifs_bud *bud; + long long bud_bytes = 0; + + if (!dbg_is_chk_gen(c)) + return 0; + + spin_lock(&c->buds_lock); + for (i = 0; i < c->jhead_cnt; i++) + list_for_each_entry(bud, &c->jheads[i].buds_list, list) + bud_bytes += c->leb_size - bud->start; + + if (c->bud_bytes != bud_bytes) { + ubifs_err("bad bud_bytes %lld, calculated %lld", + c->bud_bytes, bud_bytes); + err = -EINVAL; + } spin_unlock(&c->buds_lock); + + return err; } diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 8ce4949..fc6686b 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) @@ -28,6 +17,10 @@ * an empty LEB for the journal, or a very dirty LEB for garbage collection. */ +#define __UBOOT__ +#ifdef __UBOOT__ +#include <linux/err.h> +#endif #include "ubifs.h" /** @@ -281,7 +274,7 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, case LPROPS_FREE: if (add_to_lpt_heap(c, lprops, cat)) break; - /* No more room on heap so make it uncategorized */ + /* No more room on heap so make it un-categorized */ cat = LPROPS_UNCAT; /* Fall through */ case LPROPS_UNCAT: @@ -300,8 +293,11 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, default: ubifs_assert(0); } + lprops->flags &= ~LPROPS_CAT_MASK; lprops->flags |= cat; + c->in_a_category_cnt += 1; + ubifs_assert(c->in_a_category_cnt <= c->main_lebs); } /** @@ -334,6 +330,9 @@ static void ubifs_remove_from_cat(struct ubifs_info *c, default: ubifs_assert(0); } + + c->in_a_category_cnt -= 1; + ubifs_assert(c->in_a_category_cnt >= 0); } /** @@ -375,8 +374,8 @@ void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, * @lprops: LEB properties * * A LEB may have fallen off of the bottom of a heap, and ended up as - * uncategorized even though it has enough space for us now. If that is the case - * this function will put the LEB back onto a heap. + * un-categorized even though it has enough space for us now. If that is the + * case this function will put the LEB back onto a heap. */ void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops) { @@ -436,10 +435,10 @@ int ubifs_categorize_lprops(const struct ubifs_info *c, /** * change_category - change LEB properties category. * @c: UBIFS file-system description object - * @lprops: LEB properties to recategorize + * @lprops: LEB properties to re-categorize * * LEB properties are categorized to enable fast find operations. When the LEB - * properties change they must be recategorized. + * properties change they must be re-categorized. */ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) { @@ -447,7 +446,7 @@ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) int new_cat = ubifs_categorize_lprops(c, lprops); if (old_cat == new_cat) { - struct ubifs_lpt_heap *heap = &c->lpt_heap[new_cat - 1]; + struct ubifs_lpt_heap *heap; /* lprops on a heap now must be moved up or down */ if (new_cat < 1 || new_cat > LPROPS_HEAP_CNT) @@ -461,21 +460,18 @@ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) } /** - * calc_dark - calculate LEB dark space size. + * ubifs_calc_dark - calculate LEB dark space size. * @c: the UBIFS file-system description object * @spc: amount of free and dirty space in the LEB * - * This function calculates amount of dark space in an LEB which has @spc bytes - * of free and dirty space. Returns the calculations result. + * This function calculates and returns amount of dark space in an LEB which + * has @spc bytes of free and dirty space. * - * Dark space is the space which is not always usable - it depends on which - * nodes are written in which order. E.g., if an LEB has only 512 free bytes, - * it is dark space, because it cannot fit a large data node. So UBIFS cannot - * count on this LEB and treat these 512 bytes as usable because it is not true - * if, for example, only big chunks of uncompressible data will be written to - * the FS. + * UBIFS is trying to account the space which might not be usable, and this + * space is called "dark space". For example, if an LEB has only %512 free + * bytes, it is dark space, because it cannot fit a large data node. */ -static int calc_dark(struct ubifs_info *c, int spc) +int ubifs_calc_dark(const struct ubifs_info *c, int spc) { ubifs_assert(!(spc & 7)); @@ -507,7 +503,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) pnode = (struct ubifs_pnode *)container_of(lprops - pos, struct ubifs_pnode, lprops[0]); - return !test_bit(COW_ZNODE, &pnode->flags) && + return !test_bit(COW_CNODE, &pnode->flags) && test_bit(DIRTY_CNODE, &pnode->flags); } @@ -518,7 +514,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) * @free: new free space amount * @dirty: new dirty space amount * @flags: new flags - * @idx_gc_cnt: change to the count of idx_gc list + * @idx_gc_cnt: change to the count of @idx_gc list * * This function changes LEB properties (@free, @dirty or @flag). However, the * property which has the %LPROPS_NC value is not changed. Returns a pointer to @@ -535,7 +531,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, { /* * This is the only function that is allowed to change lprops, so we - * discard the const qualifier. + * discard the "const" qualifier. */ struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp; @@ -575,7 +571,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, if (old_spc < c->dead_wm) c->lst.total_dead -= old_spc; else - c->lst.total_dark -= calc_dark(c, old_spc); + c->lst.total_dark -= ubifs_calc_dark(c, old_spc); c->lst.total_used -= c->leb_size - old_spc; } @@ -616,7 +612,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, if (new_spc < c->dead_wm) c->lst.total_dead += new_spc; else - c->lst.total_dark += calc_dark(c, new_spc); + c->lst.total_dark += ubifs_calc_dark(c, new_spc); c->lst.total_used += c->leb_size - new_spc; } @@ -678,6 +674,9 @@ int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, out: ubifs_release_lprops(c); + if (err) + ubifs_err("cannot change properties of LEB %d, error %d", + lnum, err); return err; } @@ -714,6 +713,9 @@ int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, out: ubifs_release_lprops(c); + if (err) + ubifs_err("cannot update properties of LEB %d, error %d", + lnum, err); return err; } @@ -737,6 +739,8 @@ int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp) lpp = ubifs_lpt_lookup(c, lnum); if (IS_ERR(lpp)) { err = PTR_ERR(lpp); + ubifs_err("cannot read properties of LEB %d, error %d", + lnum, err); goto out; } @@ -840,3 +844,471 @@ const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c) ubifs_assert(lprops->free + lprops->dirty == c->leb_size); return lprops; } + +/* + * Everything below is related to debugging. + */ + +/** + * dbg_check_cats - check category heaps and lists. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +int dbg_check_cats(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + struct list_head *pos; + int i, cat; + + if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c)) + return 0; + + list_for_each_entry(lprops, &c->empty_list, list) { + if (lprops->free != c->leb_size) { + ubifs_err("non-empty LEB %d on empty list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); + return -EINVAL; + } + if (lprops->flags & LPROPS_TAKEN) { + ubifs_err("taken LEB %d on empty list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); + return -EINVAL; + } + } + + i = 0; + list_for_each_entry(lprops, &c->freeable_list, list) { + if (lprops->free + lprops->dirty != c->leb_size) { + ubifs_err("non-freeable LEB %d on freeable list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); + return -EINVAL; + } + if (lprops->flags & LPROPS_TAKEN) { + ubifs_err("taken LEB %d on freeable list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); + return -EINVAL; + } + i += 1; + } + if (i != c->freeable_cnt) { + ubifs_err("freeable list count %d expected %d", i, + c->freeable_cnt); + return -EINVAL; + } + + i = 0; + list_for_each(pos, &c->idx_gc) + i += 1; + if (i != c->idx_gc_cnt) { + ubifs_err("idx_gc list count %d expected %d", i, + c->idx_gc_cnt); + return -EINVAL; + } + + list_for_each_entry(lprops, &c->frdi_idx_list, list) { + if (lprops->free + lprops->dirty != c->leb_size) { + ubifs_err("non-freeable LEB %d on frdi_idx list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); + return -EINVAL; + } + if (lprops->flags & LPROPS_TAKEN) { + ubifs_err("taken LEB %d on frdi_idx list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); + return -EINVAL; + } + if (!(lprops->flags & LPROPS_INDEX)) { + ubifs_err("non-index LEB %d on frdi_idx list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); + return -EINVAL; + } + } + + for (cat = 1; cat <= LPROPS_HEAP_CNT; cat++) { + struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; + + for (i = 0; i < heap->cnt; i++) { + lprops = heap->arr[i]; + if (!lprops) { + ubifs_err("null ptr in LPT heap cat %d", cat); + return -EINVAL; + } + if (lprops->hpos != i) { + ubifs_err("bad ptr in LPT heap cat %d", cat); + return -EINVAL; + } + if (lprops->flags & LPROPS_TAKEN) { + ubifs_err("taken LEB in LPT heap cat %d", cat); + return -EINVAL; + } + } + } + + return 0; +} + +void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, + int add_pos) +{ + int i = 0, j, err = 0; + + if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c)) + return; + + for (i = 0; i < heap->cnt; i++) { + struct ubifs_lprops *lprops = heap->arr[i]; + struct ubifs_lprops *lp; + + if (i != add_pos) + if ((lprops->flags & LPROPS_CAT_MASK) != cat) { + err = 1; + goto out; + } + if (lprops->hpos != i) { + err = 2; + goto out; + } + lp = ubifs_lpt_lookup(c, lprops->lnum); + if (IS_ERR(lp)) { + err = 3; + goto out; + } + if (lprops != lp) { + ubifs_err("lprops %zx lp %zx lprops->lnum %d lp->lnum %d", + (size_t)lprops, (size_t)lp, lprops->lnum, + lp->lnum); + err = 4; + goto out; + } + for (j = 0; j < i; j++) { + lp = heap->arr[j]; + if (lp == lprops) { + err = 5; + goto out; + } + if (lp->lnum == lprops->lnum) { + err = 6; + goto out; + } + } + } +out: + if (err) { + ubifs_err("failed cat %d hpos %d err %d", cat, i, err); + dump_stack(); + ubifs_dump_heap(c, heap, cat); + } +} + +/** + * scan_check_cb - scan callback. + * @c: the UBIFS file-system description object + * @lp: LEB properties to scan + * @in_tree: whether the LEB properties are in main memory + * @lst: lprops statistics to update + * + * This function returns a code that indicates whether the scan should continue + * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree + * in main memory (%LPT_SCAN_ADD), or whether the scan should stop + * (%LPT_SCAN_STOP). + */ +static int scan_check_cb(struct ubifs_info *c, + const struct ubifs_lprops *lp, int in_tree, + struct ubifs_lp_stats *lst) +{ + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + int cat, lnum = lp->lnum, is_idx = 0, used = 0, freef, dirty, ret; + void *buf = NULL; + + cat = lp->flags & LPROPS_CAT_MASK; + if (cat != LPROPS_UNCAT) { + cat = ubifs_categorize_lprops(c, lp); + if (cat != (lp->flags & LPROPS_CAT_MASK)) { + ubifs_err("bad LEB category %d expected %d", + (lp->flags & LPROPS_CAT_MASK), cat); + return -EINVAL; + } + } + + /* Check lp is on its category list (if it has one) */ + if (in_tree) { + struct list_head *list = NULL; + + switch (cat) { + case LPROPS_EMPTY: + list = &c->empty_list; + break; + case LPROPS_FREEABLE: + list = &c->freeable_list; + break; + case LPROPS_FRDI_IDX: + list = &c->frdi_idx_list; + break; + case LPROPS_UNCAT: + list = &c->uncat_list; + break; + } + if (list) { + struct ubifs_lprops *lprops; + int found = 0; + + list_for_each_entry(lprops, list, list) { + if (lprops == lp) { + found = 1; + break; + } + } + if (!found) { + ubifs_err("bad LPT list (category %d)", cat); + return -EINVAL; + } + } + } + + /* Check lp is on its category heap (if it has one) */ + if (in_tree && cat > 0 && cat <= LPROPS_HEAP_CNT) { + struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; + + if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || + lp != heap->arr[lp->hpos]) { + ubifs_err("bad LPT heap (category %d)", cat); + return -EINVAL; + } + } + + buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + if (!buf) + return -ENOMEM; + + /* + * After an unclean unmount, empty and freeable LEBs + * may contain garbage - do not scan them. + */ + if (lp->free == c->leb_size) { + lst->empty_lebs += 1; + lst->total_free += c->leb_size; + lst->total_dark += ubifs_calc_dark(c, c->leb_size); + return LPT_SCAN_CONTINUE; + } + if (lp->free + lp->dirty == c->leb_size && + !(lp->flags & LPROPS_INDEX)) { + lst->total_free += lp->free; + lst->total_dirty += lp->dirty; + lst->total_dark += ubifs_calc_dark(c, c->leb_size); + return LPT_SCAN_CONTINUE; + } + + sleb = ubifs_scan(c, lnum, 0, buf, 0); + if (IS_ERR(sleb)) { + ret = PTR_ERR(sleb); + if (ret == -EUCLEAN) { + ubifs_dump_lprops(c); + ubifs_dump_budg(c, &c->bi); + } + goto out; + } + + is_idx = -1; + list_for_each_entry(snod, &sleb->nodes, list) { + int found, level = 0; + + cond_resched(); + + if (is_idx == -1) + is_idx = (snod->type == UBIFS_IDX_NODE) ? 1 : 0; + + if (is_idx && snod->type != UBIFS_IDX_NODE) { + ubifs_err("indexing node in data LEB %d:%d", + lnum, snod->offs); + goto out_destroy; + } + + if (snod->type == UBIFS_IDX_NODE) { + struct ubifs_idx_node *idx = snod->node; + + key_read(c, ubifs_idx_key(c, idx), &snod->key); + level = le16_to_cpu(idx->level); + } + + found = ubifs_tnc_has_node(c, &snod->key, level, lnum, + snod->offs, is_idx); + if (found) { + if (found < 0) + goto out_destroy; + used += ALIGN(snod->len, 8); + } + } + + freef = c->leb_size - sleb->endpt; + dirty = sleb->endpt - used; + + if (freef > c->leb_size || freef < 0 || dirty > c->leb_size || + dirty < 0) { + ubifs_err("bad calculated accounting for LEB %d: free %d, dirty %d", + lnum, freef, dirty); + goto out_destroy; + } + + if (lp->free + lp->dirty == c->leb_size && + freef + dirty == c->leb_size) + if ((is_idx && !(lp->flags & LPROPS_INDEX)) || + (!is_idx && freef == c->leb_size) || + lp->free == c->leb_size) { + /* + * Empty or freeable LEBs could contain index + * nodes from an uncompleted commit due to an + * unclean unmount. Or they could be empty for + * the same reason. Or it may simply not have been + * unmapped. + */ + freef = lp->free; + dirty = lp->dirty; + is_idx = 0; + } + + if (is_idx && lp->free + lp->dirty == freef + dirty && + lnum != c->ihead_lnum) { + /* + * After an unclean unmount, an index LEB could have a different + * amount of free space than the value recorded by lprops. That + * is because the in-the-gaps method may use free space or + * create free space (as a side-effect of using ubi_leb_change + * and not writing the whole LEB). The incorrect free space + * value is not a problem because the index is only ever + * allocated empty LEBs, so there will never be an attempt to + * write to the free space at the end of an index LEB - except + * by the in-the-gaps method for which it is not a problem. + */ + freef = lp->free; + dirty = lp->dirty; + } + + if (lp->free != freef || lp->dirty != dirty) + goto out_print; + + if (is_idx && !(lp->flags & LPROPS_INDEX)) { + if (freef == c->leb_size) + /* Free but not unmapped LEB, it's fine */ + is_idx = 0; + else { + ubifs_err("indexing node without indexing flag"); + goto out_print; + } + } + + if (!is_idx && (lp->flags & LPROPS_INDEX)) { + ubifs_err("data node with indexing flag"); + goto out_print; + } + + if (freef == c->leb_size) + lst->empty_lebs += 1; + + if (is_idx) + lst->idx_lebs += 1; + + if (!(lp->flags & LPROPS_INDEX)) + lst->total_used += c->leb_size - freef - dirty; + lst->total_free += freef; + lst->total_dirty += dirty; + + if (!(lp->flags & LPROPS_INDEX)) { + int spc = freef + dirty; + + if (spc < c->dead_wm) + lst->total_dead += spc; + else + lst->total_dark += ubifs_calc_dark(c, spc); + } + + ubifs_scan_destroy(sleb); + vfree(buf); + return LPT_SCAN_CONTINUE; + +out_print: + ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, should be free %d, dirty %d", + lnum, lp->free, lp->dirty, lp->flags, freef, dirty); + ubifs_dump_leb(c, lnum); +out_destroy: + ubifs_scan_destroy(sleb); + ret = -EINVAL; +out: + vfree(buf); + return ret; +} + +/** + * dbg_check_lprops - check all LEB properties. + * @c: UBIFS file-system description object + * + * This function checks all LEB properties and makes sure they are all correct. + * It returns zero if everything is fine, %-EINVAL if there is an inconsistency + * and other negative error codes in case of other errors. This function is + * called while the file system is locked (because of commit start), so no + * additional locking is required. Note that locking the LPT mutex would cause + * a circular lock dependency with the TNC mutex. + */ +int dbg_check_lprops(struct ubifs_info *c) +{ + int i, err; + struct ubifs_lp_stats lst; + + if (!dbg_is_chk_lprops(c)) + return 0; + + /* + * As we are going to scan the media, the write buffers have to be + * synchronized. + */ + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + return err; + } + + memset(&lst, 0, sizeof(struct ubifs_lp_stats)); + err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, + (ubifs_lpt_scan_callback)scan_check_cb, + &lst); + if (err && err != -ENOSPC) + goto out; + + if (lst.empty_lebs != c->lst.empty_lebs || + lst.idx_lebs != c->lst.idx_lebs || + lst.total_free != c->lst.total_free || + lst.total_dirty != c->lst.total_dirty || + lst.total_used != c->lst.total_used) { + ubifs_err("bad overall accounting"); + ubifs_err("calculated: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld", + lst.empty_lebs, lst.idx_lebs, lst.total_free, + lst.total_dirty, lst.total_used); + ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld", + c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, + c->lst.total_dirty, c->lst.total_used); + err = -EINVAL; + goto out; + } + + if (lst.total_dead != c->lst.total_dead || + lst.total_dark != c->lst.total_dark) { + ubifs_err("bad dead/dark space accounting"); + ubifs_err("calculated: total_dead %lld, total_dark %lld", + lst.total_dead, lst.total_dark); + ubifs_err("read from lprops: total_dead %lld, total_dark %lld", + c->lst.total_dead, c->lst.total_dark); + err = -EINVAL; + goto out; + } + + err = dbg_check_cats(c); +out: + return err; +} diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index 1a50d4c..c49d3b0 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) @@ -44,8 +33,17 @@ */ #include "ubifs.h" -#include "crc16.h" +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/crc16.h> #include <linux/math64.h> +#include <linux/slab.h> +#else +#include <linux/compat.h> +#include <linux/err.h> +#include <ubi_uboot.h> +#include "crc16.h" +#endif /** * do_calc_lpt_geom - calculate sizes for the LPT area. @@ -159,6 +157,119 @@ int ubifs_calc_lpt_geom(struct ubifs_info *c) } /** + * calc_dflt_lpt_geom - calculate default LPT geometry. + * @c: the UBIFS file-system description object + * @main_lebs: number of main area LEBs is passed and returned here + * @big_lpt: whether the LPT area is "big" is returned here + * + * The size of the LPT area depends on parameters that themselves are dependent + * on the size of the LPT area. This function, successively recalculates the LPT + * area geometry until the parameters and resultant geometry are consistent. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs, + int *big_lpt) +{ + int i, lebs_needed; + long long sz; + + /* Start by assuming the minimum number of LPT LEBs */ + c->lpt_lebs = UBIFS_MIN_LPT_LEBS; + c->main_lebs = *main_lebs - c->lpt_lebs; + if (c->main_lebs <= 0) + return -EINVAL; + + /* And assume we will use the small LPT model */ + c->big_lpt = 0; + + /* + * Calculate the geometry based on assumptions above and then see if it + * makes sense + */ + do_calc_lpt_geom(c); + + /* Small LPT model must have lpt_sz < leb_size */ + if (c->lpt_sz > c->leb_size) { + /* Nope, so try again using big LPT model */ + c->big_lpt = 1; + do_calc_lpt_geom(c); + } + + /* Now check there are enough LPT LEBs */ + for (i = 0; i < 64 ; i++) { + sz = c->lpt_sz * 4; /* Allow 4 times the size */ + lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size); + if (lebs_needed > c->lpt_lebs) { + /* Not enough LPT LEBs so try again with more */ + c->lpt_lebs = lebs_needed; + c->main_lebs = *main_lebs - c->lpt_lebs; + if (c->main_lebs <= 0) + return -EINVAL; + do_calc_lpt_geom(c); + continue; + } + if (c->ltab_sz > c->leb_size) { + ubifs_err("LPT ltab too big"); + return -EINVAL; + } + *main_lebs = c->main_lebs; + *big_lpt = c->big_lpt; + return 0; + } + return -EINVAL; +} + +/** + * pack_bits - pack bit fields end-to-end. + * @addr: address at which to pack (passed and next address returned) + * @pos: bit position at which to pack (passed and next position returned) + * @val: value to pack + * @nrbits: number of bits of value to pack (1-32) + */ +static void pack_bits(uint8_t **addr, int *pos, uint32_t val, int nrbits) +{ + uint8_t *p = *addr; + int b = *pos; + + ubifs_assert(nrbits > 0); + ubifs_assert(nrbits <= 32); + ubifs_assert(*pos >= 0); + ubifs_assert(*pos < 8); + ubifs_assert((val >> nrbits) == 0 || nrbits == 32); + if (b) { + *p |= ((uint8_t)val) << b; + nrbits += b; + if (nrbits > 8) { + *++p = (uint8_t)(val >>= (8 - b)); + if (nrbits > 16) { + *++p = (uint8_t)(val >>= 8); + if (nrbits > 24) { + *++p = (uint8_t)(val >>= 8); + if (nrbits > 32) + *++p = (uint8_t)(val >>= 8); + } + } + } + } else { + *p = (uint8_t)val; + if (nrbits > 8) { + *++p = (uint8_t)(val >>= 8); + if (nrbits > 16) { + *++p = (uint8_t)(val >>= 8); + if (nrbits > 24) + *++p = (uint8_t)(val >>= 8); + } + } + } + b = nrbits & 7; + if (b == 0) + p++; + *addr = p; + *pos = b; +} + +/** * ubifs_unpack_bits - unpack bit fields. * @addr: address at which to unpack (passed and next address returned) * @pos: bit position at which to unpack (passed and next position returned) @@ -228,6 +339,118 @@ uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits) } /** + * ubifs_pack_pnode - pack all the bit fields of a pnode. + * @c: UBIFS file-system description object + * @buf: buffer into which to pack + * @pnode: pnode to pack + */ +void ubifs_pack_pnode(struct ubifs_info *c, void *buf, + struct ubifs_pnode *pnode) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0; + uint16_t crc; + + pack_bits(&addr, &pos, UBIFS_LPT_PNODE, UBIFS_LPT_TYPE_BITS); + if (c->big_lpt) + pack_bits(&addr, &pos, pnode->num, c->pcnt_bits); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + pack_bits(&addr, &pos, pnode->lprops[i].free >> 3, + c->space_bits); + pack_bits(&addr, &pos, pnode->lprops[i].dirty >> 3, + c->space_bits); + if (pnode->lprops[i].flags & LPROPS_INDEX) + pack_bits(&addr, &pos, 1, 1); + else + pack_bits(&addr, &pos, 0, 1); + } + crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + c->pnode_sz - UBIFS_LPT_CRC_BYTES); + addr = buf; + pos = 0; + pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); +} + +/** + * ubifs_pack_nnode - pack all the bit fields of a nnode. + * @c: UBIFS file-system description object + * @buf: buffer into which to pack + * @nnode: nnode to pack + */ +void ubifs_pack_nnode(struct ubifs_info *c, void *buf, + struct ubifs_nnode *nnode) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0; + uint16_t crc; + + pack_bits(&addr, &pos, UBIFS_LPT_NNODE, UBIFS_LPT_TYPE_BITS); + if (c->big_lpt) + pack_bits(&addr, &pos, nnode->num, c->pcnt_bits); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + int lnum = nnode->nbranch[i].lnum; + + if (lnum == 0) + lnum = c->lpt_last + 1; + pack_bits(&addr, &pos, lnum - c->lpt_first, c->lpt_lnum_bits); + pack_bits(&addr, &pos, nnode->nbranch[i].offs, + c->lpt_offs_bits); + } + crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + c->nnode_sz - UBIFS_LPT_CRC_BYTES); + addr = buf; + pos = 0; + pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); +} + +/** + * ubifs_pack_ltab - pack the LPT's own lprops table. + * @c: UBIFS file-system description object + * @buf: buffer into which to pack + * @ltab: LPT's own lprops table to pack + */ +void ubifs_pack_ltab(struct ubifs_info *c, void *buf, + struct ubifs_lpt_lprops *ltab) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0; + uint16_t crc; + + pack_bits(&addr, &pos, UBIFS_LPT_LTAB, UBIFS_LPT_TYPE_BITS); + for (i = 0; i < c->lpt_lebs; i++) { + pack_bits(&addr, &pos, ltab[i].free, c->lpt_spc_bits); + pack_bits(&addr, &pos, ltab[i].dirty, c->lpt_spc_bits); + } + crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + c->ltab_sz - UBIFS_LPT_CRC_BYTES); + addr = buf; + pos = 0; + pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); +} + +/** + * ubifs_pack_lsave - pack the LPT's save table. + * @c: UBIFS file-system description object + * @buf: buffer into which to pack + * @lsave: LPT's save table to pack + */ +void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0; + uint16_t crc; + + pack_bits(&addr, &pos, UBIFS_LPT_LSAVE, UBIFS_LPT_TYPE_BITS); + for (i = 0; i < c->lsave_cnt; i++) + pack_bits(&addr, &pos, lsave[i], c->lnum_bits); + crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + c->lsave_sz - UBIFS_LPT_CRC_BYTES); + addr = buf; + pos = 0; + pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); +} + +/** * ubifs_add_lpt_dirt - add dirty space to LPT LEB properties. * @c: UBIFS file-system description object * @lnum: LEB number to which to add dirty space @@ -244,6 +467,23 @@ void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty) } /** + * set_ltab - set LPT LEB properties. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @free: amount of free space + * @dirty: amount of dirty space + */ +static void set_ltab(struct ubifs_info *c, int lnum, int free, int dirty) +{ + dbg_lp("LEB %d free %d dirty %d to %d %d", + lnum, c->ltab[lnum - c->lpt_first].free, + c->ltab[lnum - c->lpt_first].dirty, free, dirty); + ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); + c->ltab[lnum - c->lpt_first].free = free; + c->ltab[lnum - c->lpt_first].dirty = dirty; +} + +/** * ubifs_add_nnode_dirt - add dirty space to LPT LEB properties. * @c: UBIFS file-system description object * @nnode: nnode for which to add dirt @@ -276,6 +516,31 @@ static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode) } /** + * calc_nnode_num - calculate nnode number. + * @row: the row in the tree (root is zero) + * @col: the column in the row (leftmost is zero) + * + * The nnode number is a number that uniquely identifies a nnode and can be used + * easily to traverse the tree from the root to that nnode. + * + * This function calculates and returns the nnode number for the nnode at @row + * and @col. + */ +static int calc_nnode_num(int row, int col) +{ + int num, bits; + + num = 1; + while (row--) { + bits = (col & (UBIFS_LPT_FANOUT - 1)); + col >>= UBIFS_LPT_FANOUT_SHIFT; + num <<= UBIFS_LPT_FANOUT_SHIFT; + num |= bits; + } + return num; +} + +/** * calc_nnode_num_from_parent - calculate nnode number. * @c: UBIFS file-system description object * @parent: parent nnode @@ -328,6 +593,269 @@ static int calc_pnode_num_from_parent(const struct ubifs_info *c, } /** + * ubifs_create_dflt_lpt - create default LPT. + * @c: UBIFS file-system description object + * @main_lebs: number of main area LEBs is passed and returned here + * @lpt_first: LEB number of first LPT LEB + * @lpt_lebs: number of LEBs for LPT is passed and returned here + * @big_lpt: use big LPT model is passed and returned here + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, + int *lpt_lebs, int *big_lpt) +{ + int lnum, err = 0, node_sz, iopos, i, j, cnt, len, alen, row; + int blnum, boffs, bsz, bcnt; + struct ubifs_pnode *pnode = NULL; + struct ubifs_nnode *nnode = NULL; + void *buf = NULL, *p; + struct ubifs_lpt_lprops *ltab = NULL; + int *lsave = NULL; + + err = calc_dflt_lpt_geom(c, main_lebs, big_lpt); + if (err) + return err; + *lpt_lebs = c->lpt_lebs; + + /* Needed by 'ubifs_pack_nnode()' and 'set_ltab()' */ + c->lpt_first = lpt_first; + /* Needed by 'set_ltab()' */ + c->lpt_last = lpt_first + c->lpt_lebs - 1; + /* Needed by 'ubifs_pack_lsave()' */ + c->main_first = c->leb_cnt - *main_lebs; + + lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_KERNEL); + pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_KERNEL); + nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_KERNEL); + buf = vmalloc(c->leb_size); + ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); + if (!pnode || !nnode || !buf || !ltab || !lsave) { + err = -ENOMEM; + goto out; + } + + ubifs_assert(!c->ltab); + c->ltab = ltab; /* Needed by set_ltab */ + + /* Initialize LPT's own lprops */ + for (i = 0; i < c->lpt_lebs; i++) { + ltab[i].free = c->leb_size; + ltab[i].dirty = 0; + ltab[i].tgc = 0; + ltab[i].cmt = 0; + } + + lnum = lpt_first; + p = buf; + /* Number of leaf nodes (pnodes) */ + cnt = c->pnode_cnt; + + /* + * The first pnode contains the LEB properties for the LEBs that contain + * the root inode node and the root index node of the index tree. + */ + node_sz = ALIGN(ubifs_idx_node_sz(c, 1), 8); + iopos = ALIGN(node_sz, c->min_io_size); + pnode->lprops[0].free = c->leb_size - iopos; + pnode->lprops[0].dirty = iopos - node_sz; + pnode->lprops[0].flags = LPROPS_INDEX; + + node_sz = UBIFS_INO_NODE_SZ; + iopos = ALIGN(node_sz, c->min_io_size); + pnode->lprops[1].free = c->leb_size - iopos; + pnode->lprops[1].dirty = iopos - node_sz; + + for (i = 2; i < UBIFS_LPT_FANOUT; i++) + pnode->lprops[i].free = c->leb_size; + + /* Add first pnode */ + ubifs_pack_pnode(c, p, pnode); + p += c->pnode_sz; + len = c->pnode_sz; + pnode->num += 1; + + /* Reset pnode values for remaining pnodes */ + pnode->lprops[0].free = c->leb_size; + pnode->lprops[0].dirty = 0; + pnode->lprops[0].flags = 0; + + pnode->lprops[1].free = c->leb_size; + pnode->lprops[1].dirty = 0; + + /* + * To calculate the internal node branches, we keep information about + * the level below. + */ + blnum = lnum; /* LEB number of level below */ + boffs = 0; /* Offset of level below */ + bcnt = cnt; /* Number of nodes in level below */ + bsz = c->pnode_sz; /* Size of nodes in level below */ + + /* Add all remaining pnodes */ + for (i = 1; i < cnt; i++) { + if (len + c->pnode_sz > c->leb_size) { + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + memset(p, 0xff, alen - len); + err = ubifs_leb_change(c, lnum++, buf, alen); + if (err) + goto out; + p = buf; + len = 0; + } + ubifs_pack_pnode(c, p, pnode); + p += c->pnode_sz; + len += c->pnode_sz; + /* + * pnodes are simply numbered left to right starting at zero, + * which means the pnode number can be used easily to traverse + * down the tree to the corresponding pnode. + */ + pnode->num += 1; + } + + row = 0; + for (i = UBIFS_LPT_FANOUT; cnt > i; i <<= UBIFS_LPT_FANOUT_SHIFT) + row += 1; + /* Add all nnodes, one level at a time */ + while (1) { + /* Number of internal nodes (nnodes) at next level */ + cnt = DIV_ROUND_UP(cnt, UBIFS_LPT_FANOUT); + for (i = 0; i < cnt; i++) { + if (len + c->nnode_sz > c->leb_size) { + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, + alen - len); + memset(p, 0xff, alen - len); + err = ubifs_leb_change(c, lnum++, buf, alen); + if (err) + goto out; + p = buf; + len = 0; + } + /* Only 1 nnode at this level, so it is the root */ + if (cnt == 1) { + c->lpt_lnum = lnum; + c->lpt_offs = len; + } + /* Set branches to the level below */ + for (j = 0; j < UBIFS_LPT_FANOUT; j++) { + if (bcnt) { + if (boffs + bsz > c->leb_size) { + blnum += 1; + boffs = 0; + } + nnode->nbranch[j].lnum = blnum; + nnode->nbranch[j].offs = boffs; + boffs += bsz; + bcnt--; + } else { + nnode->nbranch[j].lnum = 0; + nnode->nbranch[j].offs = 0; + } + } + nnode->num = calc_nnode_num(row, i); + ubifs_pack_nnode(c, p, nnode); + p += c->nnode_sz; + len += c->nnode_sz; + } + /* Only 1 nnode at this level, so it is the root */ + if (cnt == 1) + break; + /* Update the information about the level below */ + bcnt = cnt; + bsz = c->nnode_sz; + row -= 1; + } + + if (*big_lpt) { + /* Need to add LPT's save table */ + if (len + c->lsave_sz > c->leb_size) { + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + memset(p, 0xff, alen - len); + err = ubifs_leb_change(c, lnum++, buf, alen); + if (err) + goto out; + p = buf; + len = 0; + } + + c->lsave_lnum = lnum; + c->lsave_offs = len; + + for (i = 0; i < c->lsave_cnt && i < *main_lebs; i++) + lsave[i] = c->main_first + i; + for (; i < c->lsave_cnt; i++) + lsave[i] = c->main_first; + + ubifs_pack_lsave(c, p, lsave); + p += c->lsave_sz; + len += c->lsave_sz; + } + + /* Need to add LPT's own LEB properties table */ + if (len + c->ltab_sz > c->leb_size) { + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + memset(p, 0xff, alen - len); + err = ubifs_leb_change(c, lnum++, buf, alen); + if (err) + goto out; + p = buf; + len = 0; + } + + c->ltab_lnum = lnum; + c->ltab_offs = len; + + /* Update ltab before packing it */ + len += c->ltab_sz; + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + + ubifs_pack_ltab(c, p, ltab); + p += c->ltab_sz; + + /* Write remaining buffer */ + memset(p, 0xff, alen - len); + err = ubifs_leb_change(c, lnum, buf, alen); + if (err) + goto out; + + c->nhead_lnum = lnum; + c->nhead_offs = ALIGN(len, c->min_io_size); + + dbg_lp("space_bits %d", c->space_bits); + dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits); + dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits); + dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits); + dbg_lp("pcnt_bits %d", c->pcnt_bits); + dbg_lp("lnum_bits %d", c->lnum_bits); + dbg_lp("pnode_sz %d", c->pnode_sz); + dbg_lp("nnode_sz %d", c->nnode_sz); + dbg_lp("ltab_sz %d", c->ltab_sz); + dbg_lp("lsave_sz %d", c->lsave_sz); + dbg_lp("lsave_cnt %d", c->lsave_cnt); + dbg_lp("lpt_hght %d", c->lpt_hght); + dbg_lp("big_lpt %d", c->big_lpt); + dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); + dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); + dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); + if (c->big_lpt) + dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); +out: + c->ltab = NULL; + kfree(lsave); + vfree(ltab); + vfree(buf); + kfree(nnode); + kfree(pnode); + return err; +} + +/** * update_cats - add LEB properties of a pnode to LEB category lists and heaps. * @c: UBIFS file-system description object * @pnode: pnode @@ -392,7 +920,7 @@ static int check_lpt_crc(void *buf, int len) if (crc != calc_crc) { ubifs_err("invalid crc in LPT node: crc %hx calc %hx", crc, calc_crc); - dbg_dump_stack(); + dump_stack(); return -EINVAL; } return 0; @@ -415,7 +943,7 @@ static int check_lpt_type(uint8_t **addr, int *pos, int type) if (node_type != type) { ubifs_err("invalid type (%d) in LPT node type %d", node_type, type); - dbg_dump_stack(); + dump_stack(); return -EINVAL; } return 0; @@ -524,6 +1052,34 @@ static int unpack_ltab(const struct ubifs_info *c, void *buf) return err; } +#ifndef __UBOOT__ +/** + * unpack_lsave - unpack the LPT's save table. + * @c: UBIFS file-system description object + * @buf: buffer from which to unpack + * + * This function returns %0 on success and a negative error code on failure. + */ +static int unpack_lsave(const struct ubifs_info *c, void *buf) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0, err; + + err = check_lpt_type(&addr, &pos, UBIFS_LPT_LSAVE); + if (err) + return err; + for (i = 0; i < c->lsave_cnt; i++) { + int lnum = ubifs_unpack_bits(&addr, &pos, c->lnum_bits); + + if (lnum < c->main_first || lnum >= c->leb_cnt) + return -EINVAL; + c->lsave[i] = lnum; + } + err = check_lpt_crc(buf, c->lsave_sz); + return err; +} +#endif + /** * validate_nnode - validate a nnode. * @c: UBIFS file-system description object @@ -662,7 +1218,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) if (c->big_lpt) nnode->num = calc_nnode_num_from_parent(c, parent, iip); } else { - err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); + err = ubifs_leb_read(c, lnum, buf, offs, c->nnode_sz, 1); if (err) goto out; err = ubifs_unpack_nnode(c, buf, nnode); @@ -687,6 +1243,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) out: ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); + dump_stack(); kfree(nnode); return err; } @@ -710,10 +1267,9 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) lnum = branch->lnum; offs = branch->offs; pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS); - if (!pnode) { - err = -ENOMEM; - goto out; - } + if (!pnode) + return -ENOMEM; + if (lnum == 0) { /* * This pnode was not written which just means that the LEB @@ -731,7 +1287,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) lprops->flags = ubifs_categorize_lprops(c, lprops); } } else { - err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz); + err = ubifs_leb_read(c, lnum, buf, offs, c->pnode_sz, 1); if (err) goto out; err = unpack_pnode(c, buf, pnode); @@ -752,8 +1308,9 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) out: ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); - dbg_dump_pnode(c, pnode, parent, iip); - dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); + ubifs_dump_pnode(c, pnode, parent, iip); + dump_stack(); + ubifs_err("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); kfree(pnode); return err; } @@ -772,7 +1329,7 @@ static int read_ltab(struct ubifs_info *c) buf = vmalloc(c->ltab_sz); if (!buf) return -ENOMEM; - err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz); + err = ubifs_leb_read(c, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz, 1); if (err) goto out; err = unpack_ltab(c, buf); @@ -781,6 +1338,50 @@ out: return err; } +#ifndef __UBOOT__ +/** + * read_lsave - read LPT's save table. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int read_lsave(struct ubifs_info *c) +{ + int err, i; + void *buf; + + buf = vmalloc(c->lsave_sz); + if (!buf) + return -ENOMEM; + err = ubifs_leb_read(c, c->lsave_lnum, buf, c->lsave_offs, + c->lsave_sz, 1); + if (err) + goto out; + err = unpack_lsave(c, buf); + if (err) + goto out; + for (i = 0; i < c->lsave_cnt; i++) { + int lnum = c->lsave[i]; + struct ubifs_lprops *lprops; + + /* + * Due to automatic resizing, the values in the lsave table + * could be beyond the volume size - just ignore them. + */ + if (lnum >= c->leb_cnt) + continue; + lprops = ubifs_lpt_lookup(c, lnum); + if (IS_ERR(lprops)) { + err = PTR_ERR(lprops); + goto out; + } + } +out: + vfree(buf); + return err; +} +#endif + /** * ubifs_get_nnode - get a nnode. * @c: UBIFS file-system description object @@ -861,13 +1462,13 @@ struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum) shft -= UBIFS_LPT_FANOUT_SHIFT; nnode = ubifs_get_nnode(c, nnode, iip); if (IS_ERR(nnode)) - return ERR_PTR(PTR_ERR(nnode)); + return ERR_CAST(nnode); } iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); shft -= UBIFS_LPT_FANOUT_SHIFT; pnode = ubifs_get_pnode(c, nnode, iip); if (IS_ERR(pnode)) - return ERR_PTR(PTR_ERR(pnode)); + return ERR_CAST(pnode); iip = (i & (UBIFS_LPT_FANOUT - 1)); dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, pnode->lprops[iip].free, pnode->lprops[iip].dirty, @@ -990,7 +1591,7 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum) nnode = c->nroot; nnode = dirty_cow_nnode(c, nnode); if (IS_ERR(nnode)) - return ERR_PTR(PTR_ERR(nnode)); + return ERR_CAST(nnode); i = lnum - c->main_first; shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; for (h = 1; h < c->lpt_hght; h++) { @@ -998,19 +1599,19 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum) shft -= UBIFS_LPT_FANOUT_SHIFT; nnode = ubifs_get_nnode(c, nnode, iip); if (IS_ERR(nnode)) - return ERR_PTR(PTR_ERR(nnode)); + return ERR_CAST(nnode); nnode = dirty_cow_nnode(c, nnode); if (IS_ERR(nnode)) - return ERR_PTR(PTR_ERR(nnode)); + return ERR_CAST(nnode); } iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); shft -= UBIFS_LPT_FANOUT_SHIFT; pnode = ubifs_get_pnode(c, nnode, iip); if (IS_ERR(pnode)) - return ERR_PTR(PTR_ERR(pnode)); + return ERR_CAST(pnode); pnode = dirty_cow_pnode(c, pnode); if (IS_ERR(pnode)) - return ERR_PTR(PTR_ERR(pnode)); + return ERR_CAST(pnode); iip = (i & (UBIFS_LPT_FANOUT - 1)); dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, pnode->lprops[iip].free, pnode->lprops[iip].dirty, @@ -1079,6 +1680,47 @@ static int lpt_init_rd(struct ubifs_info *c) return 0; } +#ifndef __UBOOT__ +/** + * lpt_init_wr - initialize the LPT for writing. + * @c: UBIFS file-system description object + * + * 'lpt_init_rd()' must have been called already. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int lpt_init_wr(struct ubifs_info *c) +{ + int err, i; + + c->ltab_cmt = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); + if (!c->ltab_cmt) + return -ENOMEM; + + c->lpt_buf = vmalloc(c->leb_size); + if (!c->lpt_buf) + return -ENOMEM; + + if (c->big_lpt) { + c->lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_NOFS); + if (!c->lsave) + return -ENOMEM; + err = read_lsave(c); + if (err) + return err; + } + + for (i = 0; i < c->lpt_lebs; i++) + if (c->ltab[i].free == c->leb_size) { + err = ubifs_leb_unmap(c, i + c->lpt_first); + if (err) + return err; + } + + return 0; +} +#endif + /** * ubifs_lpt_init - initialize the LPT. * @c: UBIFS file-system description object @@ -1098,8 +1740,546 @@ int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr) if (rd) { err = lpt_init_rd(c); if (err) + goto out_err; + } + +#ifndef __UBOOT__ + if (wr) { + err = lpt_init_wr(c); + if (err) + goto out_err; + } +#endif + + return 0; + +out_err: +#ifndef __UBOOT__ + if (wr) + ubifs_lpt_free(c, 1); +#endif + if (rd) + ubifs_lpt_free(c, 0); + return err; +} + +/** + * struct lpt_scan_node - somewhere to put nodes while we scan LPT. + * @nnode: where to keep a nnode + * @pnode: where to keep a pnode + * @cnode: where to keep a cnode + * @in_tree: is the node in the tree in memory + * @ptr.nnode: pointer to the nnode (if it is an nnode) which may be here or in + * the tree + * @ptr.pnode: ditto for pnode + * @ptr.cnode: ditto for cnode + */ +struct lpt_scan_node { + union { + struct ubifs_nnode nnode; + struct ubifs_pnode pnode; + struct ubifs_cnode cnode; + }; + int in_tree; + union { + struct ubifs_nnode *nnode; + struct ubifs_pnode *pnode; + struct ubifs_cnode *cnode; + } ptr; +}; + +/** + * scan_get_nnode - for the scan, get a nnode from either the tree or flash. + * @c: the UBIFS file-system description object + * @path: where to put the nnode + * @parent: parent of the nnode + * @iip: index in parent of the nnode + * + * This function returns a pointer to the nnode on success or a negative error + * code on failure. + */ +static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c, + struct lpt_scan_node *path, + struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch; + struct ubifs_nnode *nnode; + void *buf = c->lpt_nod_buf; + int err; + + branch = &parent->nbranch[iip]; + nnode = branch->nnode; + if (nnode) { + path->in_tree = 1; + path->ptr.nnode = nnode; + return nnode; + } + nnode = &path->nnode; + path->in_tree = 0; + path->ptr.nnode = nnode; + memset(nnode, 0, sizeof(struct ubifs_nnode)); + if (branch->lnum == 0) { + /* + * This nnode was not written which just means that the LEB + * properties in the subtree below it describe empty LEBs. We + * make the nnode as though we had read it, which in fact means + * doing almost nothing. + */ + if (c->big_lpt) + nnode->num = calc_nnode_num_from_parent(c, parent, iip); + } else { + err = ubifs_leb_read(c, branch->lnum, buf, branch->offs, + c->nnode_sz, 1); + if (err) + return ERR_PTR(err); + err = ubifs_unpack_nnode(c, buf, nnode); + if (err) + return ERR_PTR(err); + } + err = validate_nnode(c, nnode, parent, iip); + if (err) + return ERR_PTR(err); + if (!c->big_lpt) + nnode->num = calc_nnode_num_from_parent(c, parent, iip); + nnode->level = parent->level - 1; + nnode->parent = parent; + nnode->iip = iip; + return nnode; +} + +/** + * scan_get_pnode - for the scan, get a pnode from either the tree or flash. + * @c: the UBIFS file-system description object + * @path: where to put the pnode + * @parent: parent of the pnode + * @iip: index in parent of the pnode + * + * This function returns a pointer to the pnode on success or a negative error + * code on failure. + */ +static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c, + struct lpt_scan_node *path, + struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch; + struct ubifs_pnode *pnode; + void *buf = c->lpt_nod_buf; + int err; + + branch = &parent->nbranch[iip]; + pnode = branch->pnode; + if (pnode) { + path->in_tree = 1; + path->ptr.pnode = pnode; + return pnode; + } + pnode = &path->pnode; + path->in_tree = 0; + path->ptr.pnode = pnode; + memset(pnode, 0, sizeof(struct ubifs_pnode)); + if (branch->lnum == 0) { + /* + * This pnode was not written which just means that the LEB + * properties in it describe empty LEBs. We make the pnode as + * though we had read it. + */ + int i; + + if (c->big_lpt) + pnode->num = calc_pnode_num_from_parent(c, parent, iip); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_lprops * const lprops = &pnode->lprops[i]; + + lprops->free = c->leb_size; + lprops->flags = ubifs_categorize_lprops(c, lprops); + } + } else { + ubifs_assert(branch->lnum >= c->lpt_first && + branch->lnum <= c->lpt_last); + ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size); + err = ubifs_leb_read(c, branch->lnum, buf, branch->offs, + c->pnode_sz, 1); + if (err) + return ERR_PTR(err); + err = unpack_pnode(c, buf, pnode); + if (err) + return ERR_PTR(err); + } + err = validate_pnode(c, pnode, parent, iip); + if (err) + return ERR_PTR(err); + if (!c->big_lpt) + pnode->num = calc_pnode_num_from_parent(c, parent, iip); + pnode->parent = parent; + pnode->iip = iip; + set_pnode_lnum(c, pnode); + return pnode; +} + +/** + * ubifs_lpt_scan_nolock - scan the LPT. + * @c: the UBIFS file-system description object + * @start_lnum: LEB number from which to start scanning + * @end_lnum: LEB number at which to stop scanning + * @scan_cb: callback function called for each lprops + * @data: data to be passed to the callback function + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum, + ubifs_lpt_scan_callback scan_cb, void *data) +{ + int err = 0, i, h, iip, shft; + struct ubifs_nnode *nnode; + struct ubifs_pnode *pnode; + struct lpt_scan_node *path; + + if (start_lnum == -1) { + start_lnum = end_lnum + 1; + if (start_lnum >= c->leb_cnt) + start_lnum = c->main_first; + } + + ubifs_assert(start_lnum >= c->main_first && start_lnum < c->leb_cnt); + ubifs_assert(end_lnum >= c->main_first && end_lnum < c->leb_cnt); + + if (!c->nroot) { + err = ubifs_read_nnode(c, NULL, 0); + if (err) return err; } + path = kmalloc(sizeof(struct lpt_scan_node) * (c->lpt_hght + 1), + GFP_NOFS); + if (!path) + return -ENOMEM; + + path[0].ptr.nnode = c->nroot; + path[0].in_tree = 1; +again: + /* Descend to the pnode containing start_lnum */ + nnode = c->nroot; + i = start_lnum - c->main_first; + shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; + for (h = 1; h < c->lpt_hght; h++) { + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + nnode = scan_get_nnode(c, path + h, nnode, iip); + if (IS_ERR(nnode)) { + err = PTR_ERR(nnode); + goto out; + } + } + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + pnode = scan_get_pnode(c, path + h, nnode, iip); + if (IS_ERR(pnode)) { + err = PTR_ERR(pnode); + goto out; + } + iip = (i & (UBIFS_LPT_FANOUT - 1)); + + /* Loop for each lprops */ + while (1) { + struct ubifs_lprops *lprops = &pnode->lprops[iip]; + int ret, lnum = lprops->lnum; + + ret = scan_cb(c, lprops, path[h].in_tree, data); + if (ret < 0) { + err = ret; + goto out; + } + if (ret & LPT_SCAN_ADD) { + /* Add all the nodes in path to the tree in memory */ + for (h = 1; h < c->lpt_hght; h++) { + const size_t sz = sizeof(struct ubifs_nnode); + struct ubifs_nnode *parent; + + if (path[h].in_tree) + continue; + nnode = kmemdup(&path[h].nnode, sz, GFP_NOFS); + if (!nnode) { + err = -ENOMEM; + goto out; + } + parent = nnode->parent; + parent->nbranch[nnode->iip].nnode = nnode; + path[h].ptr.nnode = nnode; + path[h].in_tree = 1; + path[h + 1].cnode.parent = nnode; + } + if (path[h].in_tree) + ubifs_ensure_cat(c, lprops); + else { + const size_t sz = sizeof(struct ubifs_pnode); + struct ubifs_nnode *parent; + + pnode = kmemdup(&path[h].pnode, sz, GFP_NOFS); + if (!pnode) { + err = -ENOMEM; + goto out; + } + parent = pnode->parent; + parent->nbranch[pnode->iip].pnode = pnode; + path[h].ptr.pnode = pnode; + path[h].in_tree = 1; + update_cats(c, pnode); + c->pnodes_have += 1; + } + err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *) + c->nroot, 0, 0); + if (err) + goto out; + err = dbg_check_cats(c); + if (err) + goto out; + } + if (ret & LPT_SCAN_STOP) { + err = 0; + break; + } + /* Get the next lprops */ + if (lnum == end_lnum) { + /* + * We got to the end without finding what we were + * looking for + */ + err = -ENOSPC; + goto out; + } + if (lnum + 1 >= c->leb_cnt) { + /* Wrap-around to the beginning */ + start_lnum = c->main_first; + goto again; + } + if (iip + 1 < UBIFS_LPT_FANOUT) { + /* Next lprops is in the same pnode */ + iip += 1; + continue; + } + /* We need to get the next pnode. Go up until we can go right */ + iip = pnode->iip; + while (1) { + h -= 1; + ubifs_assert(h >= 0); + nnode = path[h].ptr.nnode; + if (iip + 1 < UBIFS_LPT_FANOUT) + break; + iip = nnode->iip; + } + /* Go right */ + iip += 1; + /* Descend to the pnode */ + h += 1; + for (; h < c->lpt_hght; h++) { + nnode = scan_get_nnode(c, path + h, nnode, iip); + if (IS_ERR(nnode)) { + err = PTR_ERR(nnode); + goto out; + } + iip = 0; + } + pnode = scan_get_pnode(c, path + h, nnode, iip); + if (IS_ERR(pnode)) { + err = PTR_ERR(pnode); + goto out; + } + iip = 0; + } +out: + kfree(path); + return err; +} + +/** + * dbg_chk_pnode - check a pnode. + * @c: the UBIFS file-system description object + * @pnode: pnode to check + * @col: pnode column + * + * This function returns %0 on success and a negative error code on failure. + */ +static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, + int col) +{ + int i; + + if (pnode->num != col) { + ubifs_err("pnode num %d expected %d parent num %d iip %d", + pnode->num, col, pnode->parent->num, pnode->iip); + return -EINVAL; + } + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_lprops *lp, *lprops = &pnode->lprops[i]; + int lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + i + + c->main_first; + int found, cat = lprops->flags & LPROPS_CAT_MASK; + struct ubifs_lpt_heap *heap; + struct list_head *list = NULL; + + if (lnum >= c->leb_cnt) + continue; + if (lprops->lnum != lnum) { + ubifs_err("bad LEB number %d expected %d", + lprops->lnum, lnum); + return -EINVAL; + } + if (lprops->flags & LPROPS_TAKEN) { + if (cat != LPROPS_UNCAT) { + ubifs_err("LEB %d taken but not uncat %d", + lprops->lnum, cat); + return -EINVAL; + } + continue; + } + if (lprops->flags & LPROPS_INDEX) { + switch (cat) { + case LPROPS_UNCAT: + case LPROPS_DIRTY_IDX: + case LPROPS_FRDI_IDX: + break; + default: + ubifs_err("LEB %d index but cat %d", + lprops->lnum, cat); + return -EINVAL; + } + } else { + switch (cat) { + case LPROPS_UNCAT: + case LPROPS_DIRTY: + case LPROPS_FREE: + case LPROPS_EMPTY: + case LPROPS_FREEABLE: + break; + default: + ubifs_err("LEB %d not index but cat %d", + lprops->lnum, cat); + return -EINVAL; + } + } + switch (cat) { + case LPROPS_UNCAT: + list = &c->uncat_list; + break; + case LPROPS_EMPTY: + list = &c->empty_list; + break; + case LPROPS_FREEABLE: + list = &c->freeable_list; + break; + case LPROPS_FRDI_IDX: + list = &c->frdi_idx_list; + break; + } + found = 0; + switch (cat) { + case LPROPS_DIRTY: + case LPROPS_DIRTY_IDX: + case LPROPS_FREE: + heap = &c->lpt_heap[cat - 1]; + if (lprops->hpos < heap->cnt && + heap->arr[lprops->hpos] == lprops) + found = 1; + break; + case LPROPS_UNCAT: + case LPROPS_EMPTY: + case LPROPS_FREEABLE: + case LPROPS_FRDI_IDX: + list_for_each_entry(lp, list, list) + if (lprops == lp) { + found = 1; + break; + } + break; + } + if (!found) { + ubifs_err("LEB %d cat %d not found in cat heap/list", + lprops->lnum, cat); + return -EINVAL; + } + switch (cat) { + case LPROPS_EMPTY: + if (lprops->free != c->leb_size) { + ubifs_err("LEB %d cat %d free %d dirty %d", + lprops->lnum, cat, lprops->free, + lprops->dirty); + return -EINVAL; + } + case LPROPS_FREEABLE: + case LPROPS_FRDI_IDX: + if (lprops->free + lprops->dirty != c->leb_size) { + ubifs_err("LEB %d cat %d free %d dirty %d", + lprops->lnum, cat, lprops->free, + lprops->dirty); + return -EINVAL; + } + } + } + return 0; +} + +/** + * dbg_check_lpt_nodes - check nnodes and pnodes. + * @c: the UBIFS file-system description object + * @cnode: next cnode (nnode or pnode) to check + * @row: row of cnode (root is zero) + * @col: column of cnode (leftmost is zero) + * + * This function returns %0 on success and a negative error code on failure. + */ +int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, + int row, int col) +{ + struct ubifs_nnode *nnode, *nn; + struct ubifs_cnode *cn; + int num, iip = 0, err; + + if (!dbg_is_chk_lprops(c)) + return 0; + + while (cnode) { + ubifs_assert(row >= 0); + nnode = cnode->parent; + if (cnode->level) { + /* cnode is a nnode */ + num = calc_nnode_num(row, col); + if (cnode->num != num) { + ubifs_err("nnode num %d expected %d parent num %d iip %d", + cnode->num, num, + (nnode ? nnode->num : 0), cnode->iip); + return -EINVAL; + } + nn = (struct ubifs_nnode *)cnode; + while (iip < UBIFS_LPT_FANOUT) { + cn = nn->nbranch[iip].cnode; + if (cn) { + /* Go down */ + row += 1; + col <<= UBIFS_LPT_FANOUT_SHIFT; + col += iip; + iip = 0; + cnode = cn; + break; + } + /* Go right */ + iip += 1; + } + if (iip < UBIFS_LPT_FANOUT) + continue; + } else { + struct ubifs_pnode *pnode; + + /* cnode is a pnode */ + pnode = (struct ubifs_pnode *)cnode; + err = dbg_chk_pnode(c, pnode, col); + if (err) + return err; + } + /* Go up and to the right */ + row -= 1; + col >>= UBIFS_LPT_FANOUT_SHIFT; + iip = cnode->iip + 1; + cnode = (struct ubifs_cnode *)nnode; + } return 0; } diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index c0af818..cad422e 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) @@ -25,9 +14,1284 @@ * subsystem. */ +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/crc16.h> +#include <linux/slab.h> +#include <linux/random.h> +#else +#include <linux/compat.h> +#include <linux/err.h> #include "crc16.h" +#endif #include "ubifs.h" +#ifndef __UBOOT__ +static int dbg_populate_lsave(struct ubifs_info *c); +#endif + +/** + * first_dirty_cnode - find first dirty cnode. + * @c: UBIFS file-system description object + * @nnode: nnode at which to start + * + * This function returns the first dirty cnode or %NULL if there is not one. + */ +static struct ubifs_cnode *first_dirty_cnode(struct ubifs_nnode *nnode) +{ + ubifs_assert(nnode); + while (1) { + int i, cont = 0; + + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_cnode *cnode; + + cnode = nnode->nbranch[i].cnode; + if (cnode && + test_bit(DIRTY_CNODE, &cnode->flags)) { + if (cnode->level == 0) + return cnode; + nnode = (struct ubifs_nnode *)cnode; + cont = 1; + break; + } + } + if (!cont) + return (struct ubifs_cnode *)nnode; + } +} + +/** + * next_dirty_cnode - find next dirty cnode. + * @cnode: cnode from which to begin searching + * + * This function returns the next dirty cnode or %NULL if there is not one. + */ +static struct ubifs_cnode *next_dirty_cnode(struct ubifs_cnode *cnode) +{ + struct ubifs_nnode *nnode; + int i; + + ubifs_assert(cnode); + nnode = cnode->parent; + if (!nnode) + return NULL; + for (i = cnode->iip + 1; i < UBIFS_LPT_FANOUT; i++) { + cnode = nnode->nbranch[i].cnode; + if (cnode && test_bit(DIRTY_CNODE, &cnode->flags)) { + if (cnode->level == 0) + return cnode; /* cnode is a pnode */ + /* cnode is a nnode */ + return first_dirty_cnode((struct ubifs_nnode *)cnode); + } + } + return (struct ubifs_cnode *)nnode; +} + +/** + * get_cnodes_to_commit - create list of dirty cnodes to commit. + * @c: UBIFS file-system description object + * + * This function returns the number of cnodes to commit. + */ +static int get_cnodes_to_commit(struct ubifs_info *c) +{ + struct ubifs_cnode *cnode, *cnext; + int cnt = 0; + + if (!c->nroot) + return 0; + + if (!test_bit(DIRTY_CNODE, &c->nroot->flags)) + return 0; + + c->lpt_cnext = first_dirty_cnode(c->nroot); + cnode = c->lpt_cnext; + if (!cnode) + return 0; + cnt += 1; + while (1) { + ubifs_assert(!test_bit(COW_CNODE, &cnode->flags)); + __set_bit(COW_CNODE, &cnode->flags); + cnext = next_dirty_cnode(cnode); + if (!cnext) { + cnode->cnext = c->lpt_cnext; + break; + } + cnode->cnext = cnext; + cnode = cnext; + cnt += 1; + } + dbg_cmt("committing %d cnodes", cnt); + dbg_lp("committing %d cnodes", cnt); + ubifs_assert(cnt == c->dirty_nn_cnt + c->dirty_pn_cnt); + return cnt; +} + +/** + * upd_ltab - update LPT LEB properties. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @free: amount of free space + * @dirty: amount of dirty space to add + */ +static void upd_ltab(struct ubifs_info *c, int lnum, int free, int dirty) +{ + dbg_lp("LEB %d free %d dirty %d to %d +%d", + lnum, c->ltab[lnum - c->lpt_first].free, + c->ltab[lnum - c->lpt_first].dirty, free, dirty); + ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); + c->ltab[lnum - c->lpt_first].free = free; + c->ltab[lnum - c->lpt_first].dirty += dirty; +} + +/** + * alloc_lpt_leb - allocate an LPT LEB that is empty. + * @c: UBIFS file-system description object + * @lnum: LEB number is passed and returned here + * + * This function finds the next empty LEB in the ltab starting from @lnum. If a + * an empty LEB is found it is returned in @lnum and the function returns %0. + * Otherwise the function returns -ENOSPC. Note however, that LPT is designed + * never to run out of space. + */ +static int alloc_lpt_leb(struct ubifs_info *c, int *lnum) +{ + int i, n; + + n = *lnum - c->lpt_first + 1; + for (i = n; i < c->lpt_lebs; i++) { + if (c->ltab[i].tgc || c->ltab[i].cmt) + continue; + if (c->ltab[i].free == c->leb_size) { + c->ltab[i].cmt = 1; + *lnum = i + c->lpt_first; + return 0; + } + } + + for (i = 0; i < n; i++) { + if (c->ltab[i].tgc || c->ltab[i].cmt) + continue; + if (c->ltab[i].free == c->leb_size) { + c->ltab[i].cmt = 1; + *lnum = i + c->lpt_first; + return 0; + } + } + return -ENOSPC; +} + +/** + * layout_cnodes - layout cnodes for commit. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int layout_cnodes(struct ubifs_info *c) +{ + int lnum, offs, len, alen, done_lsave, done_ltab, err; + struct ubifs_cnode *cnode; + + err = dbg_chk_lpt_sz(c, 0, 0); + if (err) + return err; + cnode = c->lpt_cnext; + if (!cnode) + return 0; + lnum = c->nhead_lnum; + offs = c->nhead_offs; + /* Try to place lsave and ltab nicely */ + done_lsave = !c->big_lpt; + done_ltab = 0; + if (!done_lsave && offs + c->lsave_sz <= c->leb_size) { + done_lsave = 1; + c->lsave_lnum = lnum; + c->lsave_offs = offs; + offs += c->lsave_sz; + dbg_chk_lpt_sz(c, 1, c->lsave_sz); + } + + if (offs + c->ltab_sz <= c->leb_size) { + done_ltab = 1; + c->ltab_lnum = lnum; + c->ltab_offs = offs; + offs += c->ltab_sz; + dbg_chk_lpt_sz(c, 1, c->ltab_sz); + } + + do { + if (cnode->level) { + len = c->nnode_sz; + c->dirty_nn_cnt -= 1; + } else { + len = c->pnode_sz; + c->dirty_pn_cnt -= 1; + } + while (offs + len > c->leb_size) { + alen = ALIGN(offs, c->min_io_size); + upd_ltab(c, lnum, c->leb_size - alen, alen - offs); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); + err = alloc_lpt_leb(c, &lnum); + if (err) + goto no_space; + offs = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + /* Try to place lsave and ltab nicely */ + if (!done_lsave) { + done_lsave = 1; + c->lsave_lnum = lnum; + c->lsave_offs = offs; + offs += c->lsave_sz; + dbg_chk_lpt_sz(c, 1, c->lsave_sz); + continue; + } + if (!done_ltab) { + done_ltab = 1; + c->ltab_lnum = lnum; + c->ltab_offs = offs; + offs += c->ltab_sz; + dbg_chk_lpt_sz(c, 1, c->ltab_sz); + continue; + } + break; + } + if (cnode->parent) { + cnode->parent->nbranch[cnode->iip].lnum = lnum; + cnode->parent->nbranch[cnode->iip].offs = offs; + } else { + c->lpt_lnum = lnum; + c->lpt_offs = offs; + } + offs += len; + dbg_chk_lpt_sz(c, 1, len); + cnode = cnode->cnext; + } while (cnode && cnode != c->lpt_cnext); + + /* Make sure to place LPT's save table */ + if (!done_lsave) { + if (offs + c->lsave_sz > c->leb_size) { + alen = ALIGN(offs, c->min_io_size); + upd_ltab(c, lnum, c->leb_size - alen, alen - offs); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); + err = alloc_lpt_leb(c, &lnum); + if (err) + goto no_space; + offs = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + } + done_lsave = 1; + c->lsave_lnum = lnum; + c->lsave_offs = offs; + offs += c->lsave_sz; + dbg_chk_lpt_sz(c, 1, c->lsave_sz); + } + + /* Make sure to place LPT's own lprops table */ + if (!done_ltab) { + if (offs + c->ltab_sz > c->leb_size) { + alen = ALIGN(offs, c->min_io_size); + upd_ltab(c, lnum, c->leb_size - alen, alen - offs); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); + err = alloc_lpt_leb(c, &lnum); + if (err) + goto no_space; + offs = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + } + done_ltab = 1; + c->ltab_lnum = lnum; + c->ltab_offs = offs; + offs += c->ltab_sz; + dbg_chk_lpt_sz(c, 1, c->ltab_sz); + } + + alen = ALIGN(offs, c->min_io_size); + upd_ltab(c, lnum, c->leb_size - alen, alen - offs); + dbg_chk_lpt_sz(c, 4, alen - offs); + err = dbg_chk_lpt_sz(c, 3, alen); + if (err) + return err; + return 0; + +no_space: + ubifs_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, done_lsave %d", + lnum, offs, len, done_ltab, done_lsave); + ubifs_dump_lpt_info(c); + ubifs_dump_lpt_lebs(c); + dump_stack(); + return err; +} + +#ifndef __UBOOT__ +/** + * realloc_lpt_leb - allocate an LPT LEB that is empty. + * @c: UBIFS file-system description object + * @lnum: LEB number is passed and returned here + * + * This function duplicates exactly the results of the function alloc_lpt_leb. + * It is used during end commit to reallocate the same LEB numbers that were + * allocated by alloc_lpt_leb during start commit. + * + * This function finds the next LEB that was allocated by the alloc_lpt_leb + * function starting from @lnum. If a LEB is found it is returned in @lnum and + * the function returns %0. Otherwise the function returns -ENOSPC. + * Note however, that LPT is designed never to run out of space. + */ +static int realloc_lpt_leb(struct ubifs_info *c, int *lnum) +{ + int i, n; + + n = *lnum - c->lpt_first + 1; + for (i = n; i < c->lpt_lebs; i++) + if (c->ltab[i].cmt) { + c->ltab[i].cmt = 0; + *lnum = i + c->lpt_first; + return 0; + } + + for (i = 0; i < n; i++) + if (c->ltab[i].cmt) { + c->ltab[i].cmt = 0; + *lnum = i + c->lpt_first; + return 0; + } + return -ENOSPC; +} + +/** + * write_cnodes - write cnodes for commit. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int write_cnodes(struct ubifs_info *c) +{ + int lnum, offs, len, from, err, wlen, alen, done_ltab, done_lsave; + struct ubifs_cnode *cnode; + void *buf = c->lpt_buf; + + cnode = c->lpt_cnext; + if (!cnode) + return 0; + lnum = c->nhead_lnum; + offs = c->nhead_offs; + from = offs; + /* Ensure empty LEB is unmapped */ + if (offs == 0) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + /* Try to place lsave and ltab nicely */ + done_lsave = !c->big_lpt; + done_ltab = 0; + if (!done_lsave && offs + c->lsave_sz <= c->leb_size) { + done_lsave = 1; + ubifs_pack_lsave(c, buf + offs, c->lsave); + offs += c->lsave_sz; + dbg_chk_lpt_sz(c, 1, c->lsave_sz); + } + + if (offs + c->ltab_sz <= c->leb_size) { + done_ltab = 1; + ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); + offs += c->ltab_sz; + dbg_chk_lpt_sz(c, 1, c->ltab_sz); + } + + /* Loop for each cnode */ + do { + if (cnode->level) + len = c->nnode_sz; + else + len = c->pnode_sz; + while (offs + len > c->leb_size) { + wlen = offs - from; + if (wlen) { + alen = ALIGN(wlen, c->min_io_size); + memset(buf + offs, 0xff, alen - wlen); + err = ubifs_leb_write(c, lnum, buf + from, from, + alen); + if (err) + return err; + } + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); + err = realloc_lpt_leb(c, &lnum); + if (err) + goto no_space; + offs = from = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + /* Try to place lsave and ltab nicely */ + if (!done_lsave) { + done_lsave = 1; + ubifs_pack_lsave(c, buf + offs, c->lsave); + offs += c->lsave_sz; + dbg_chk_lpt_sz(c, 1, c->lsave_sz); + continue; + } + if (!done_ltab) { + done_ltab = 1; + ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); + offs += c->ltab_sz; + dbg_chk_lpt_sz(c, 1, c->ltab_sz); + continue; + } + break; + } + if (cnode->level) + ubifs_pack_nnode(c, buf + offs, + (struct ubifs_nnode *)cnode); + else + ubifs_pack_pnode(c, buf + offs, + (struct ubifs_pnode *)cnode); + /* + * The reason for the barriers is the same as in case of TNC. + * See comment in 'write_index()'. 'dirty_cow_nnode()' and + * 'dirty_cow_pnode()' are the functions for which this is + * important. + */ + clear_bit(DIRTY_CNODE, &cnode->flags); + smp_mb__before_clear_bit(); + clear_bit(COW_CNODE, &cnode->flags); + smp_mb__after_clear_bit(); + offs += len; + dbg_chk_lpt_sz(c, 1, len); + cnode = cnode->cnext; + } while (cnode && cnode != c->lpt_cnext); + + /* Make sure to place LPT's save table */ + if (!done_lsave) { + if (offs + c->lsave_sz > c->leb_size) { + wlen = offs - from; + alen = ALIGN(wlen, c->min_io_size); + memset(buf + offs, 0xff, alen - wlen); + err = ubifs_leb_write(c, lnum, buf + from, from, alen); + if (err) + return err; + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); + err = realloc_lpt_leb(c, &lnum); + if (err) + goto no_space; + offs = from = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + done_lsave = 1; + ubifs_pack_lsave(c, buf + offs, c->lsave); + offs += c->lsave_sz; + dbg_chk_lpt_sz(c, 1, c->lsave_sz); + } + + /* Make sure to place LPT's own lprops table */ + if (!done_ltab) { + if (offs + c->ltab_sz > c->leb_size) { + wlen = offs - from; + alen = ALIGN(wlen, c->min_io_size); + memset(buf + offs, 0xff, alen - wlen); + err = ubifs_leb_write(c, lnum, buf + from, from, alen); + if (err) + return err; + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); + err = realloc_lpt_leb(c, &lnum); + if (err) + goto no_space; + offs = from = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + done_ltab = 1; + ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); + offs += c->ltab_sz; + dbg_chk_lpt_sz(c, 1, c->ltab_sz); + } + + /* Write remaining data in buffer */ + wlen = offs - from; + alen = ALIGN(wlen, c->min_io_size); + memset(buf + offs, 0xff, alen - wlen); + err = ubifs_leb_write(c, lnum, buf + from, from, alen); + if (err) + return err; + + dbg_chk_lpt_sz(c, 4, alen - wlen); + err = dbg_chk_lpt_sz(c, 3, ALIGN(offs, c->min_io_size)); + if (err) + return err; + + c->nhead_lnum = lnum; + c->nhead_offs = ALIGN(offs, c->min_io_size); + + dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); + dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); + dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); + if (c->big_lpt) + dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); + + return 0; + +no_space: + ubifs_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab %d, done_lsave %d", + lnum, offs, len, done_ltab, done_lsave); + ubifs_dump_lpt_info(c); + ubifs_dump_lpt_lebs(c); + dump_stack(); + return err; +} +#endif + +/** + * next_pnode_to_dirty - find next pnode to dirty. + * @c: UBIFS file-system description object + * @pnode: pnode + * + * This function returns the next pnode to dirty or %NULL if there are no more + * pnodes. Note that pnodes that have never been written (lnum == 0) are + * skipped. + */ +static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c, + struct ubifs_pnode *pnode) +{ + struct ubifs_nnode *nnode; + int iip; + + /* Try to go right */ + nnode = pnode->parent; + for (iip = pnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) { + if (nnode->nbranch[iip].lnum) + return ubifs_get_pnode(c, nnode, iip); + } + + /* Go up while can't go right */ + do { + iip = nnode->iip + 1; + nnode = nnode->parent; + if (!nnode) + return NULL; + for (; iip < UBIFS_LPT_FANOUT; iip++) { + if (nnode->nbranch[iip].lnum) + break; + } + } while (iip >= UBIFS_LPT_FANOUT); + + /* Go right */ + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) + return (void *)nnode; + + /* Go down to level 1 */ + while (nnode->level > 1) { + for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++) { + if (nnode->nbranch[iip].lnum) + break; + } + if (iip >= UBIFS_LPT_FANOUT) { + /* + * Should not happen, but we need to keep going + * if it does. + */ + iip = 0; + } + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) + return (void *)nnode; + } + + for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++) + if (nnode->nbranch[iip].lnum) + break; + if (iip >= UBIFS_LPT_FANOUT) + /* Should not happen, but we need to keep going if it does */ + iip = 0; + return ubifs_get_pnode(c, nnode, iip); +} + +/** + * pnode_lookup - lookup a pnode in the LPT. + * @c: UBIFS file-system description object + * @i: pnode number (0 to main_lebs - 1) + * + * This function returns a pointer to the pnode on success or a negative + * error code on failure. + */ +static struct ubifs_pnode *pnode_lookup(struct ubifs_info *c, int i) +{ + int err, h, iip, shft; + struct ubifs_nnode *nnode; + + if (!c->nroot) { + err = ubifs_read_nnode(c, NULL, 0); + if (err) + return ERR_PTR(err); + } + i <<= UBIFS_LPT_FANOUT_SHIFT; + nnode = c->nroot; + shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; + for (h = 1; h < c->lpt_hght; h++) { + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) + return ERR_CAST(nnode); + } + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + return ubifs_get_pnode(c, nnode, iip); +} + +/** + * add_pnode_dirt - add dirty space to LPT LEB properties. + * @c: UBIFS file-system description object + * @pnode: pnode for which to add dirt + */ +static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode) +{ + ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum, + c->pnode_sz); +} + +/** + * do_make_pnode_dirty - mark a pnode dirty. + * @c: UBIFS file-system description object + * @pnode: pnode to mark dirty + */ +static void do_make_pnode_dirty(struct ubifs_info *c, struct ubifs_pnode *pnode) +{ + /* Assumes cnext list is empty i.e. not called during commit */ + if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) { + struct ubifs_nnode *nnode; + + c->dirty_pn_cnt += 1; + add_pnode_dirt(c, pnode); + /* Mark parent and ancestors dirty too */ + nnode = pnode->parent; + while (nnode) { + if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { + c->dirty_nn_cnt += 1; + ubifs_add_nnode_dirt(c, nnode); + nnode = nnode->parent; + } else + break; + } + } +} + +/** + * make_tree_dirty - mark the entire LEB properties tree dirty. + * @c: UBIFS file-system description object + * + * This function is used by the "small" LPT model to cause the entire LEB + * properties tree to be written. The "small" LPT model does not use LPT + * garbage collection because it is more efficient to write the entire tree + * (because it is small). + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_tree_dirty(struct ubifs_info *c) +{ + struct ubifs_pnode *pnode; + + pnode = pnode_lookup(c, 0); + if (IS_ERR(pnode)) + return PTR_ERR(pnode); + + while (pnode) { + do_make_pnode_dirty(c, pnode); + pnode = next_pnode_to_dirty(c, pnode); + if (IS_ERR(pnode)) + return PTR_ERR(pnode); + } + return 0; +} + +/** + * need_write_all - determine if the LPT area is running out of free space. + * @c: UBIFS file-system description object + * + * This function returns %1 if the LPT area is running out of free space and %0 + * if it is not. + */ +static int need_write_all(struct ubifs_info *c) +{ + long long free = 0; + int i; + + for (i = 0; i < c->lpt_lebs; i++) { + if (i + c->lpt_first == c->nhead_lnum) + free += c->leb_size - c->nhead_offs; + else if (c->ltab[i].free == c->leb_size) + free += c->leb_size; + else if (c->ltab[i].free + c->ltab[i].dirty == c->leb_size) + free += c->leb_size; + } + /* Less than twice the size left */ + if (free <= c->lpt_sz * 2) + return 1; + return 0; +} + +/** + * lpt_tgc_start - start trivial garbage collection of LPT LEBs. + * @c: UBIFS file-system description object + * + * LPT trivial garbage collection is where a LPT LEB contains only dirty and + * free space and so may be reused as soon as the next commit is completed. + * This function is called during start commit to mark LPT LEBs for trivial GC. + */ +static void lpt_tgc_start(struct ubifs_info *c) +{ + int i; + + for (i = 0; i < c->lpt_lebs; i++) { + if (i + c->lpt_first == c->nhead_lnum) + continue; + if (c->ltab[i].dirty > 0 && + c->ltab[i].free + c->ltab[i].dirty == c->leb_size) { + c->ltab[i].tgc = 1; + c->ltab[i].free = c->leb_size; + c->ltab[i].dirty = 0; + dbg_lp("LEB %d", i + c->lpt_first); + } + } +} + +/** + * lpt_tgc_end - end trivial garbage collection of LPT LEBs. + * @c: UBIFS file-system description object + * + * LPT trivial garbage collection is where a LPT LEB contains only dirty and + * free space and so may be reused as soon as the next commit is completed. + * This function is called after the commit is completed (master node has been + * written) and un-maps LPT LEBs that were marked for trivial GC. + */ +static int lpt_tgc_end(struct ubifs_info *c) +{ + int i, err; + + for (i = 0; i < c->lpt_lebs; i++) + if (c->ltab[i].tgc) { + err = ubifs_leb_unmap(c, i + c->lpt_first); + if (err) + return err; + c->ltab[i].tgc = 0; + dbg_lp("LEB %d", i + c->lpt_first); + } + return 0; +} + +/** + * populate_lsave - fill the lsave array with important LEB numbers. + * @c: the UBIFS file-system description object + * + * This function is only called for the "big" model. It records a small number + * of LEB numbers of important LEBs. Important LEBs are ones that are (from + * most important to least important): empty, freeable, freeable index, dirty + * index, dirty or free. Upon mount, we read this list of LEB numbers and bring + * their pnodes into memory. That will stop us from having to scan the LPT + * straight away. For the "small" model we assume that scanning the LPT is no + * big deal. + */ +static void populate_lsave(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + struct ubifs_lpt_heap *heap; + int i, cnt = 0; + + ubifs_assert(c->big_lpt); + if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) { + c->lpt_drty_flgs |= LSAVE_DIRTY; + ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); + } + +#ifndef __UBOOT__ + if (dbg_populate_lsave(c)) + return; +#endif + + list_for_each_entry(lprops, &c->empty_list, list) { + c->lsave[cnt++] = lprops->lnum; + if (cnt >= c->lsave_cnt) + return; + } + list_for_each_entry(lprops, &c->freeable_list, list) { + c->lsave[cnt++] = lprops->lnum; + if (cnt >= c->lsave_cnt) + return; + } + list_for_each_entry(lprops, &c->frdi_idx_list, list) { + c->lsave[cnt++] = lprops->lnum; + if (cnt >= c->lsave_cnt) + return; + } + heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; + for (i = 0; i < heap->cnt; i++) { + c->lsave[cnt++] = heap->arr[i]->lnum; + if (cnt >= c->lsave_cnt) + return; + } + heap = &c->lpt_heap[LPROPS_DIRTY - 1]; + for (i = 0; i < heap->cnt; i++) { + c->lsave[cnt++] = heap->arr[i]->lnum; + if (cnt >= c->lsave_cnt) + return; + } + heap = &c->lpt_heap[LPROPS_FREE - 1]; + for (i = 0; i < heap->cnt; i++) { + c->lsave[cnt++] = heap->arr[i]->lnum; + if (cnt >= c->lsave_cnt) + return; + } + /* Fill it up completely */ + while (cnt < c->lsave_cnt) + c->lsave[cnt++] = c->main_first; +} + +/** + * nnode_lookup - lookup a nnode in the LPT. + * @c: UBIFS file-system description object + * @i: nnode number + * + * This function returns a pointer to the nnode on success or a negative + * error code on failure. + */ +static struct ubifs_nnode *nnode_lookup(struct ubifs_info *c, int i) +{ + int err, iip; + struct ubifs_nnode *nnode; + + if (!c->nroot) { + err = ubifs_read_nnode(c, NULL, 0); + if (err) + return ERR_PTR(err); + } + nnode = c->nroot; + while (1) { + iip = i & (UBIFS_LPT_FANOUT - 1); + i >>= UBIFS_LPT_FANOUT_SHIFT; + if (!i) + break; + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) + return nnode; + } + return nnode; +} + +/** + * make_nnode_dirty - find a nnode and, if found, make it dirty. + * @c: UBIFS file-system description object + * @node_num: nnode number of nnode to make dirty + * @lnum: LEB number where nnode was written + * @offs: offset where nnode was written + * + * This function is used by LPT garbage collection. LPT garbage collection is + * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection + * simply involves marking all the nodes in the LEB being garbage-collected as + * dirty. The dirty nodes are written next commit, after which the LEB is free + * to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_nnode_dirty(struct ubifs_info *c, int node_num, int lnum, + int offs) +{ + struct ubifs_nnode *nnode; + + nnode = nnode_lookup(c, node_num); + if (IS_ERR(nnode)) + return PTR_ERR(nnode); + if (nnode->parent) { + struct ubifs_nbranch *branch; + + branch = &nnode->parent->nbranch[nnode->iip]; + if (branch->lnum != lnum || branch->offs != offs) + return 0; /* nnode is obsolete */ + } else if (c->lpt_lnum != lnum || c->lpt_offs != offs) + return 0; /* nnode is obsolete */ + /* Assumes cnext list is empty i.e. not called during commit */ + if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { + c->dirty_nn_cnt += 1; + ubifs_add_nnode_dirt(c, nnode); + /* Mark parent and ancestors dirty too */ + nnode = nnode->parent; + while (nnode) { + if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { + c->dirty_nn_cnt += 1; + ubifs_add_nnode_dirt(c, nnode); + nnode = nnode->parent; + } else + break; + } + } + return 0; +} + +/** + * make_pnode_dirty - find a pnode and, if found, make it dirty. + * @c: UBIFS file-system description object + * @node_num: pnode number of pnode to make dirty + * @lnum: LEB number where pnode was written + * @offs: offset where pnode was written + * + * This function is used by LPT garbage collection. LPT garbage collection is + * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection + * simply involves marking all the nodes in the LEB being garbage-collected as + * dirty. The dirty nodes are written next commit, after which the LEB is free + * to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_pnode_dirty(struct ubifs_info *c, int node_num, int lnum, + int offs) +{ + struct ubifs_pnode *pnode; + struct ubifs_nbranch *branch; + + pnode = pnode_lookup(c, node_num); + if (IS_ERR(pnode)) + return PTR_ERR(pnode); + branch = &pnode->parent->nbranch[pnode->iip]; + if (branch->lnum != lnum || branch->offs != offs) + return 0; + do_make_pnode_dirty(c, pnode); + return 0; +} + +/** + * make_ltab_dirty - make ltab node dirty. + * @c: UBIFS file-system description object + * @lnum: LEB number where ltab was written + * @offs: offset where ltab was written + * + * This function is used by LPT garbage collection. LPT garbage collection is + * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection + * simply involves marking all the nodes in the LEB being garbage-collected as + * dirty. The dirty nodes are written next commit, after which the LEB is free + * to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_ltab_dirty(struct ubifs_info *c, int lnum, int offs) +{ + if (lnum != c->ltab_lnum || offs != c->ltab_offs) + return 0; /* This ltab node is obsolete */ + if (!(c->lpt_drty_flgs & LTAB_DIRTY)) { + c->lpt_drty_flgs |= LTAB_DIRTY; + ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz); + } + return 0; +} + +/** + * make_lsave_dirty - make lsave node dirty. + * @c: UBIFS file-system description object + * @lnum: LEB number where lsave was written + * @offs: offset where lsave was written + * + * This function is used by LPT garbage collection. LPT garbage collection is + * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection + * simply involves marking all the nodes in the LEB being garbage-collected as + * dirty. The dirty nodes are written next commit, after which the LEB is free + * to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_lsave_dirty(struct ubifs_info *c, int lnum, int offs) +{ + if (lnum != c->lsave_lnum || offs != c->lsave_offs) + return 0; /* This lsave node is obsolete */ + if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) { + c->lpt_drty_flgs |= LSAVE_DIRTY; + ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); + } + return 0; +} + +/** + * make_node_dirty - make node dirty. + * @c: UBIFS file-system description object + * @node_type: LPT node type + * @node_num: node number + * @lnum: LEB number where node was written + * @offs: offset where node was written + * + * This function is used by LPT garbage collection. LPT garbage collection is + * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection + * simply involves marking all the nodes in the LEB being garbage-collected as + * dirty. The dirty nodes are written next commit, after which the LEB is free + * to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_node_dirty(struct ubifs_info *c, int node_type, int node_num, + int lnum, int offs) +{ + switch (node_type) { + case UBIFS_LPT_NNODE: + return make_nnode_dirty(c, node_num, lnum, offs); + case UBIFS_LPT_PNODE: + return make_pnode_dirty(c, node_num, lnum, offs); + case UBIFS_LPT_LTAB: + return make_ltab_dirty(c, lnum, offs); + case UBIFS_LPT_LSAVE: + return make_lsave_dirty(c, lnum, offs); + } + return -EINVAL; +} + +/** + * get_lpt_node_len - return the length of a node based on its type. + * @c: UBIFS file-system description object + * @node_type: LPT node type + */ +static int get_lpt_node_len(const struct ubifs_info *c, int node_type) +{ + switch (node_type) { + case UBIFS_LPT_NNODE: + return c->nnode_sz; + case UBIFS_LPT_PNODE: + return c->pnode_sz; + case UBIFS_LPT_LTAB: + return c->ltab_sz; + case UBIFS_LPT_LSAVE: + return c->lsave_sz; + } + return 0; +} + +/** + * get_pad_len - return the length of padding in a buffer. + * @c: UBIFS file-system description object + * @buf: buffer + * @len: length of buffer + */ +static int get_pad_len(const struct ubifs_info *c, uint8_t *buf, int len) +{ + int offs, pad_len; + + if (c->min_io_size == 1) + return 0; + offs = c->leb_size - len; + pad_len = ALIGN(offs, c->min_io_size) - offs; + return pad_len; +} + +/** + * get_lpt_node_type - return type (and node number) of a node in a buffer. + * @c: UBIFS file-system description object + * @buf: buffer + * @node_num: node number is returned here + */ +static int get_lpt_node_type(const struct ubifs_info *c, uint8_t *buf, + int *node_num) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int pos = 0, node_type; + + node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); + *node_num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); + return node_type; +} + +/** + * is_a_node - determine if a buffer contains a node. + * @c: UBIFS file-system description object + * @buf: buffer + * @len: length of buffer + * + * This function returns %1 if the buffer contains a node or %0 if it does not. + */ +static int is_a_node(const struct ubifs_info *c, uint8_t *buf, int len) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int pos = 0, node_type, node_len; + uint16_t crc, calc_crc; + + if (len < UBIFS_LPT_CRC_BYTES + (UBIFS_LPT_TYPE_BITS + 7) / 8) + return 0; + node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); + if (node_type == UBIFS_LPT_NOT_A_NODE) + return 0; + node_len = get_lpt_node_len(c, node_type); + if (!node_len || node_len > len) + return 0; + pos = 0; + addr = buf; + crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS); + calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + node_len - UBIFS_LPT_CRC_BYTES); + if (crc != calc_crc) + return 0; + return 1; +} + +/** + * lpt_gc_lnum - garbage collect a LPT LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to garbage collect + * + * LPT garbage collection is used only for the "big" LPT model + * (c->big_lpt == 1). Garbage collection simply involves marking all the nodes + * in the LEB being garbage-collected as dirty. The dirty nodes are written + * next commit, after which the LEB is free to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int lpt_gc_lnum(struct ubifs_info *c, int lnum) +{ + int err, len = c->leb_size, node_type, node_num, node_len, offs; + void *buf = c->lpt_buf; + + dbg_lp("LEB %d", lnum); + + err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); + if (err) + return err; + + while (1) { + if (!is_a_node(c, buf, len)) { + int pad_len; + + pad_len = get_pad_len(c, buf, len); + if (pad_len) { + buf += pad_len; + len -= pad_len; + continue; + } + return 0; + } + node_type = get_lpt_node_type(c, buf, &node_num); + node_len = get_lpt_node_len(c, node_type); + offs = c->leb_size - len; + ubifs_assert(node_len != 0); + mutex_lock(&c->lp_mutex); + err = make_node_dirty(c, node_type, node_num, lnum, offs); + mutex_unlock(&c->lp_mutex); + if (err) + return err; + buf += node_len; + len -= node_len; + } + return 0; +} + +/** + * lpt_gc - LPT garbage collection. + * @c: UBIFS file-system description object + * + * Select a LPT LEB for LPT garbage collection and call 'lpt_gc_lnum()'. + * Returns %0 on success and a negative error code on failure. + */ +static int lpt_gc(struct ubifs_info *c) +{ + int i, lnum = -1, dirty = 0; + + mutex_lock(&c->lp_mutex); + for (i = 0; i < c->lpt_lebs; i++) { + ubifs_assert(!c->ltab[i].tgc); + if (i + c->lpt_first == c->nhead_lnum || + c->ltab[i].free + c->ltab[i].dirty == c->leb_size) + continue; + if (c->ltab[i].dirty > dirty) { + dirty = c->ltab[i].dirty; + lnum = i + c->lpt_first; + } + } + mutex_unlock(&c->lp_mutex); + if (lnum == -1) + return -ENOSPC; + return lpt_gc_lnum(c, lnum); +} + +/** + * ubifs_lpt_start_commit - UBIFS commit starts. + * @c: the UBIFS file-system description object + * + * This function has to be called when UBIFS starts the commit operation. + * This function "freezes" all currently dirty LEB properties and does not + * change them anymore. Further changes are saved and tracked separately + * because they are not part of this commit. This function returns zero in case + * of success and a negative error code in case of failure. + */ +int ubifs_lpt_start_commit(struct ubifs_info *c) +{ + int err, cnt; + + dbg_lp(""); + + mutex_lock(&c->lp_mutex); + err = dbg_chk_lpt_free_spc(c); + if (err) + goto out; + err = dbg_check_ltab(c); + if (err) + goto out; + + if (c->check_lpt_free) { + /* + * We ensure there is enough free space in + * ubifs_lpt_post_commit() by marking nodes dirty. That + * information is lost when we unmount, so we also need + * to check free space once after mounting also. + */ + c->check_lpt_free = 0; + while (need_write_all(c)) { + mutex_unlock(&c->lp_mutex); + err = lpt_gc(c); + if (err) + return err; + mutex_lock(&c->lp_mutex); + } + } + + lpt_tgc_start(c); + + if (!c->dirty_pn_cnt) { + dbg_cmt("no cnodes to commit"); + err = 0; + goto out; + } + + if (!c->big_lpt && need_write_all(c)) { + /* If needed, write everything */ + err = make_tree_dirty(c); + if (err) + goto out; + lpt_tgc_start(c); + } + + if (c->big_lpt) + populate_lsave(c); + + cnt = get_cnodes_to_commit(c); + ubifs_assert(cnt != 0); + + err = layout_cnodes(c); + if (err) + goto out; + + /* Copy the LPT's own lprops for end commit to write */ + memcpy(c->ltab_cmt, c->ltab, + sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); + c->lpt_drty_flgs &= ~(LTAB_DIRTY | LSAVE_DIRTY); + +out: + mutex_unlock(&c->lp_mutex); + return err; +} + /** * free_obsolete_cnodes - free obsolete cnodes for commit end. * @c: UBIFS file-system description object @@ -50,6 +1314,65 @@ static void free_obsolete_cnodes(struct ubifs_info *c) c->lpt_cnext = NULL; } +#ifndef __UBOOT__ +/** + * ubifs_lpt_end_commit - finish the commit operation. + * @c: the UBIFS file-system description object + * + * This function has to be called when the commit operation finishes. It + * flushes the changes which were "frozen" by 'ubifs_lprops_start_commit()' to + * the media. Returns zero in case of success and a negative error code in case + * of failure. + */ +int ubifs_lpt_end_commit(struct ubifs_info *c) +{ + int err; + + dbg_lp(""); + + if (!c->lpt_cnext) + return 0; + + err = write_cnodes(c); + if (err) + return err; + + mutex_lock(&c->lp_mutex); + free_obsolete_cnodes(c); + mutex_unlock(&c->lp_mutex); + + return 0; +} +#endif + +/** + * ubifs_lpt_post_commit - post commit LPT trivial GC and LPT GC. + * @c: UBIFS file-system description object + * + * LPT trivial GC is completed after a commit. Also LPT GC is done after a + * commit for the "big" LPT model. + */ +int ubifs_lpt_post_commit(struct ubifs_info *c) +{ + int err; + + mutex_lock(&c->lp_mutex); + err = lpt_tgc_end(c); + if (err) + goto out; + if (c->big_lpt) + while (need_write_all(c)) { + mutex_unlock(&c->lp_mutex); + err = lpt_gc(c); + if (err) + return err; + mutex_lock(&c->lp_mutex); + } +out: + mutex_unlock(&c->lp_mutex); + return err; +} + /** * first_nnode - find the first nnode in memory. * @c: UBIFS file-system description object @@ -169,3 +1492,549 @@ void ubifs_lpt_free(struct ubifs_info *c, int wr_only) vfree(c->ltab); kfree(c->lpt_nod_buf); } + +#ifndef __UBOOT__ +/* + * Everything below is related to debugging. + */ + +/** + * dbg_is_all_ff - determine if a buffer contains only 0xFF bytes. + * @buf: buffer + * @len: buffer length + */ +static int dbg_is_all_ff(uint8_t *buf, int len) +{ + int i; + + for (i = 0; i < len; i++) + if (buf[i] != 0xff) + return 0; + return 1; +} + +/** + * dbg_is_nnode_dirty - determine if a nnode is dirty. + * @c: the UBIFS file-system description object + * @lnum: LEB number where nnode was written + * @offs: offset where nnode was written + */ +static int dbg_is_nnode_dirty(struct ubifs_info *c, int lnum, int offs) +{ + struct ubifs_nnode *nnode; + int hght; + + /* Entire tree is in memory so first_nnode / next_nnode are OK */ + nnode = first_nnode(c, &hght); + for (; nnode; nnode = next_nnode(c, nnode, &hght)) { + struct ubifs_nbranch *branch; + + cond_resched(); + if (nnode->parent) { + branch = &nnode->parent->nbranch[nnode->iip]; + if (branch->lnum != lnum || branch->offs != offs) + continue; + if (test_bit(DIRTY_CNODE, &nnode->flags)) + return 1; + return 0; + } else { + if (c->lpt_lnum != lnum || c->lpt_offs != offs) + continue; + if (test_bit(DIRTY_CNODE, &nnode->flags)) + return 1; + return 0; + } + } + return 1; +} + +/** + * dbg_is_pnode_dirty - determine if a pnode is dirty. + * @c: the UBIFS file-system description object + * @lnum: LEB number where pnode was written + * @offs: offset where pnode was written + */ +static int dbg_is_pnode_dirty(struct ubifs_info *c, int lnum, int offs) +{ + int i, cnt; + + cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); + for (i = 0; i < cnt; i++) { + struct ubifs_pnode *pnode; + struct ubifs_nbranch *branch; + + cond_resched(); + pnode = pnode_lookup(c, i); + if (IS_ERR(pnode)) + return PTR_ERR(pnode); + branch = &pnode->parent->nbranch[pnode->iip]; + if (branch->lnum != lnum || branch->offs != offs) + continue; + if (test_bit(DIRTY_CNODE, &pnode->flags)) + return 1; + return 0; + } + return 1; +} + +/** + * dbg_is_ltab_dirty - determine if a ltab node is dirty. + * @c: the UBIFS file-system description object + * @lnum: LEB number where ltab node was written + * @offs: offset where ltab node was written + */ +static int dbg_is_ltab_dirty(struct ubifs_info *c, int lnum, int offs) +{ + if (lnum != c->ltab_lnum || offs != c->ltab_offs) + return 1; + return (c->lpt_drty_flgs & LTAB_DIRTY) != 0; +} + +/** + * dbg_is_lsave_dirty - determine if a lsave node is dirty. + * @c: the UBIFS file-system description object + * @lnum: LEB number where lsave node was written + * @offs: offset where lsave node was written + */ +static int dbg_is_lsave_dirty(struct ubifs_info *c, int lnum, int offs) +{ + if (lnum != c->lsave_lnum || offs != c->lsave_offs) + return 1; + return (c->lpt_drty_flgs & LSAVE_DIRTY) != 0; +} + +/** + * dbg_is_node_dirty - determine if a node is dirty. + * @c: the UBIFS file-system description object + * @node_type: node type + * @lnum: LEB number where node was written + * @offs: offset where node was written + */ +static int dbg_is_node_dirty(struct ubifs_info *c, int node_type, int lnum, + int offs) +{ + switch (node_type) { + case UBIFS_LPT_NNODE: + return dbg_is_nnode_dirty(c, lnum, offs); + case UBIFS_LPT_PNODE: + return dbg_is_pnode_dirty(c, lnum, offs); + case UBIFS_LPT_LTAB: + return dbg_is_ltab_dirty(c, lnum, offs); + case UBIFS_LPT_LSAVE: + return dbg_is_lsave_dirty(c, lnum, offs); + } + return 1; +} + +/** + * dbg_check_ltab_lnum - check the ltab for a LPT LEB number. + * @c: the UBIFS file-system description object + * @lnum: LEB number where node was written + * @offs: offset where node was written + * + * This function returns %0 on success and a negative error code on failure. + */ +static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) +{ + int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; + int ret; + void *buf, *p; + + if (!dbg_is_chk_lprops(c)) + return 0; + + buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + if (!buf) { + ubifs_err("cannot allocate memory for ltab checking"); + return 0; + } + + dbg_lp("LEB %d", lnum); + + err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); + if (err) + goto out; + + while (1) { + if (!is_a_node(c, p, len)) { + int i, pad_len; + + pad_len = get_pad_len(c, p, len); + if (pad_len) { + p += pad_len; + len -= pad_len; + dirty += pad_len; + continue; + } + if (!dbg_is_all_ff(p, len)) { + ubifs_err("invalid empty space in LEB %d at %d", + lnum, c->leb_size - len); + err = -EINVAL; + } + i = lnum - c->lpt_first; + if (len != c->ltab[i].free) { + ubifs_err("invalid free space in LEB %d (free %d, expected %d)", + lnum, len, c->ltab[i].free); + err = -EINVAL; + } + if (dirty != c->ltab[i].dirty) { + ubifs_err("invalid dirty space in LEB %d (dirty %d, expected %d)", + lnum, dirty, c->ltab[i].dirty); + err = -EINVAL; + } + goto out; + } + node_type = get_lpt_node_type(c, p, &node_num); + node_len = get_lpt_node_len(c, node_type); + ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len); + if (ret == 1) + dirty += node_len; + p += node_len; + len -= node_len; + } + + err = 0; +out: + vfree(buf); + return err; +} + +/** + * dbg_check_ltab - check the free and dirty space in the ltab. + * @c: the UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +int dbg_check_ltab(struct ubifs_info *c) +{ + int lnum, err, i, cnt; + + if (!dbg_is_chk_lprops(c)) + return 0; + + /* Bring the entire tree into memory */ + cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); + for (i = 0; i < cnt; i++) { + struct ubifs_pnode *pnode; + + pnode = pnode_lookup(c, i); + if (IS_ERR(pnode)) + return PTR_ERR(pnode); + cond_resched(); + } + + /* Check nodes */ + err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *)c->nroot, 0, 0); + if (err) + return err; + + /* Check each LEB */ + for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { + err = dbg_check_ltab_lnum(c, lnum); + if (err) { + ubifs_err("failed at LEB %d", lnum); + return err; + } + } + + dbg_lp("succeeded"); + return 0; +} + +/** + * dbg_chk_lpt_free_spc - check LPT free space is enough to write entire LPT. + * @c: the UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +int dbg_chk_lpt_free_spc(struct ubifs_info *c) +{ + long long free = 0; + int i; + + if (!dbg_is_chk_lprops(c)) + return 0; + + for (i = 0; i < c->lpt_lebs; i++) { + if (c->ltab[i].tgc || c->ltab[i].cmt) + continue; + if (i + c->lpt_first == c->nhead_lnum) + free += c->leb_size - c->nhead_offs; + else if (c->ltab[i].free == c->leb_size) + free += c->leb_size; + } + if (free < c->lpt_sz) { + ubifs_err("LPT space error: free %lld lpt_sz %lld", + free, c->lpt_sz); + ubifs_dump_lpt_info(c); + ubifs_dump_lpt_lebs(c); + dump_stack(); + return -EINVAL; + } + return 0; +} + +/** + * dbg_chk_lpt_sz - check LPT does not write more than LPT size. + * @c: the UBIFS file-system description object + * @action: what to do + * @len: length written + * + * This function returns %0 on success and a negative error code on failure. + * The @action argument may be one of: + * o %0 - LPT debugging checking starts, initialize debugging variables; + * o %1 - wrote an LPT node, increase LPT size by @len bytes; + * o %2 - switched to a different LEB and wasted @len bytes; + * o %3 - check that we've written the right number of bytes. + * o %4 - wasted @len bytes; + */ +int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) +{ + struct ubifs_debug_info *d = c->dbg; + long long chk_lpt_sz, lpt_sz; + int err = 0; + + if (!dbg_is_chk_lprops(c)) + return 0; + + switch (action) { + case 0: + d->chk_lpt_sz = 0; + d->chk_lpt_sz2 = 0; + d->chk_lpt_lebs = 0; + d->chk_lpt_wastage = 0; + if (c->dirty_pn_cnt > c->pnode_cnt) { + ubifs_err("dirty pnodes %d exceed max %d", + c->dirty_pn_cnt, c->pnode_cnt); + err = -EINVAL; + } + if (c->dirty_nn_cnt > c->nnode_cnt) { + ubifs_err("dirty nnodes %d exceed max %d", + c->dirty_nn_cnt, c->nnode_cnt); + err = -EINVAL; + } + return err; + case 1: + d->chk_lpt_sz += len; + return 0; + case 2: + d->chk_lpt_sz += len; + d->chk_lpt_wastage += len; + d->chk_lpt_lebs += 1; + return 0; + case 3: + chk_lpt_sz = c->leb_size; + chk_lpt_sz *= d->chk_lpt_lebs; + chk_lpt_sz += len - c->nhead_offs; + if (d->chk_lpt_sz != chk_lpt_sz) { + ubifs_err("LPT wrote %lld but space used was %lld", + d->chk_lpt_sz, chk_lpt_sz); + err = -EINVAL; + } + if (d->chk_lpt_sz > c->lpt_sz) { + ubifs_err("LPT wrote %lld but lpt_sz is %lld", + d->chk_lpt_sz, c->lpt_sz); + err = -EINVAL; + } + if (d->chk_lpt_sz2 && d->chk_lpt_sz != d->chk_lpt_sz2) { + ubifs_err("LPT layout size %lld but wrote %lld", + d->chk_lpt_sz, d->chk_lpt_sz2); + err = -EINVAL; + } + if (d->chk_lpt_sz2 && d->new_nhead_offs != len) { + ubifs_err("LPT new nhead offs: expected %d was %d", + d->new_nhead_offs, len); + err = -EINVAL; + } + lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; + lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; + lpt_sz += c->ltab_sz; + if (c->big_lpt) + lpt_sz += c->lsave_sz; + if (d->chk_lpt_sz - d->chk_lpt_wastage > lpt_sz) { + ubifs_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld", + d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz); + err = -EINVAL; + } + if (err) { + ubifs_dump_lpt_info(c); + ubifs_dump_lpt_lebs(c); + dump_stack(); + } + d->chk_lpt_sz2 = d->chk_lpt_sz; + d->chk_lpt_sz = 0; + d->chk_lpt_wastage = 0; + d->chk_lpt_lebs = 0; + d->new_nhead_offs = len; + return err; + case 4: + d->chk_lpt_sz += len; + d->chk_lpt_wastage += len; + return 0; + default: + return -EINVAL; + } +} + +/** + * ubifs_dump_lpt_leb - dump an LPT LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to dump + * + * This function dumps an LEB from LPT area. Nodes in this area are very + * different to nodes in the main area (e.g., they do not have common headers, + * they do not have 8-byte alignments, etc), so we have a separate function to + * dump LPT area LEBs. Note, LPT has to be locked by the caller. + */ +static void dump_lpt_leb(const struct ubifs_info *c, int lnum) +{ + int err, len = c->leb_size, node_type, node_num, node_len, offs; + void *buf, *p; + + pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum); + buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + if (!buf) { + ubifs_err("cannot allocate memory to dump LPT"); + return; + } + + err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); + if (err) + goto out; + + while (1) { + offs = c->leb_size - len; + if (!is_a_node(c, p, len)) { + int pad_len; + + pad_len = get_pad_len(c, p, len); + if (pad_len) { + pr_err("LEB %d:%d, pad %d bytes\n", + lnum, offs, pad_len); + p += pad_len; + len -= pad_len; + continue; + } + if (len) + pr_err("LEB %d:%d, free %d bytes\n", + lnum, offs, len); + break; + } + + node_type = get_lpt_node_type(c, p, &node_num); + switch (node_type) { + case UBIFS_LPT_PNODE: + { + node_len = c->pnode_sz; + if (c->big_lpt) + pr_err("LEB %d:%d, pnode num %d\n", + lnum, offs, node_num); + else + pr_err("LEB %d:%d, pnode\n", lnum, offs); + break; + } + case UBIFS_LPT_NNODE: + { + int i; + struct ubifs_nnode nnode; + + node_len = c->nnode_sz; + if (c->big_lpt) + pr_err("LEB %d:%d, nnode num %d, ", + lnum, offs, node_num); + else + pr_err("LEB %d:%d, nnode, ", + lnum, offs); + err = ubifs_unpack_nnode(c, p, &nnode); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + pr_cont("%d:%d", nnode.nbranch[i].lnum, + nnode.nbranch[i].offs); + if (i != UBIFS_LPT_FANOUT - 1) + pr_cont(", "); + } + pr_cont("\n"); + break; + } + case UBIFS_LPT_LTAB: + node_len = c->ltab_sz; + pr_err("LEB %d:%d, ltab\n", lnum, offs); + break; + case UBIFS_LPT_LSAVE: + node_len = c->lsave_sz; + pr_err("LEB %d:%d, lsave len\n", lnum, offs); + break; + default: + ubifs_err("LPT node type %d not recognized", node_type); + goto out; + } + + p += node_len; + len -= node_len; + } + + pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum); +out: + vfree(buf); + return; +} + +/** + * ubifs_dump_lpt_lebs - dump LPT lebs. + * @c: UBIFS file-system description object + * + * This function dumps all LPT LEBs. The caller has to make sure the LPT is + * locked. + */ +void ubifs_dump_lpt_lebs(const struct ubifs_info *c) +{ + int i; + + pr_err("(pid %d) start dumping all LPT LEBs\n", current->pid); + for (i = 0; i < c->lpt_lebs; i++) + dump_lpt_leb(c, i + c->lpt_first); + pr_err("(pid %d) finish dumping all LPT LEBs\n", current->pid); +} + +/** + * dbg_populate_lsave - debugging version of 'populate_lsave()' + * @c: UBIFS file-system description object + * + * This is a debugging version for 'populate_lsave()' which populates lsave + * with random LEBs instead of useful LEBs, which is good for test coverage. + * Returns zero if lsave has not been populated (this debugging feature is + * disabled) an non-zero if lsave has been populated. + */ +static int dbg_populate_lsave(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + struct ubifs_lpt_heap *heap; + int i; + + if (!dbg_is_chk_gen(c)) + return 0; + if (prandom_u32() & 3) + return 0; + + for (i = 0; i < c->lsave_cnt; i++) + c->lsave[i] = c->main_first; + + list_for_each_entry(lprops, &c->empty_list, list) + c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum; + list_for_each_entry(lprops, &c->freeable_list, list) + c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum; + list_for_each_entry(lprops, &c->frdi_idx_list, list) + c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum; + + heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; + for (i = 0; i < heap->cnt; i++) + c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum; + heap = &c->lpt_heap[LPROPS_DIRTY - 1]; + for (i = 0; i < heap->cnt; i++) + c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum; + heap = &c->lpt_heap[LPROPS_FREE - 1]; + for (i = 0; i < heap->cnt; i++) + c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum; + + return 1; +} +#endif diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index 3f2926e..00ca855 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter @@ -22,14 +11,21 @@ /* This file implements reading and writing the master node */ +#define __UBOOT__ #include "ubifs.h" +#ifdef __UBOOT__ +#include <linux/compat.h> +#include <linux/err.h> +#include <ubi_uboot.h> +#endif /** * scan_for_master - search the valid master node. * @c: UBIFS file-system description object * * This function scans the master node LEBs and search for the latest master - * node. Returns zero in case of success and a negative error code in case of + * node. Returns zero in case of success, %-EUCLEAN if there master area is + * corrupted and requires recovery, and a negative error code in case of * failure. */ static int scan_for_master(struct ubifs_info *c) @@ -40,7 +36,7 @@ static int scan_for_master(struct ubifs_info *c) lnum = UBIFS_MST_LNUM; - sleb = ubifs_scan(c, lnum, 0, c->sbuf); + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); if (IS_ERR(sleb)) return PTR_ERR(sleb); nodes_cnt = sleb->nodes_cnt; @@ -48,7 +44,7 @@ static int scan_for_master(struct ubifs_info *c) snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); if (snod->type != UBIFS_MST_NODE) - goto out; + goto out_dump; memcpy(c->mst_node, snod->node, snod->len); offs = snod->offs; } @@ -56,7 +52,7 @@ static int scan_for_master(struct ubifs_info *c) lnum += 1; - sleb = ubifs_scan(c, lnum, 0, c->sbuf); + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); if (IS_ERR(sleb)) return PTR_ERR(sleb); if (sleb->nodes_cnt != nodes_cnt) @@ -65,7 +61,7 @@ static int scan_for_master(struct ubifs_info *c) goto out; snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); if (snod->type != UBIFS_MST_NODE) - goto out; + goto out_dump; if (snod->offs != offs) goto out; if (memcmp((void *)c->mst_node + UBIFS_CH_SZ, @@ -78,6 +74,12 @@ static int scan_for_master(struct ubifs_info *c) out: ubifs_scan_destroy(sleb); + return -EUCLEAN; + +out_dump: + ubifs_err("unexpected node type %d master LEB %d:%d", + snod->type, lnum, snod->offs); + ubifs_scan_destroy(sleb); return -EINVAL; } @@ -141,7 +143,7 @@ static int validate_master(const struct ubifs_info *c) } main_sz = (long long)c->main_lebs * c->leb_size; - if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { + if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) { err = 9; goto out; } @@ -211,7 +213,7 @@ static int validate_master(const struct ubifs_info *c) } if (c->lst.total_dead + c->lst.total_dark + - c->lst.total_used + c->old_idx_sz > main_sz) { + c->lst.total_used + c->bi.old_idx_sz > main_sz) { err = 21; goto out; } @@ -234,7 +236,7 @@ static int validate_master(const struct ubifs_info *c) out: ubifs_err("bad master node at offset %d error %d", c->mst_offs, err); - dbg_dump_node(c, c->mst_node); + ubifs_dump_node(c, c->mst_node); return -EINVAL; } @@ -256,7 +258,8 @@ int ubifs_read_master(struct ubifs_info *c) err = scan_for_master(c); if (err) { - err = ubifs_recover_master_node(c); + if (err == -EUCLEAN) + err = ubifs_recover_master_node(c); if (err) /* * Note, we do not free 'c->mst_node' here because the @@ -278,7 +281,7 @@ int ubifs_read_master(struct ubifs_info *c) c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); - c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); + c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size); c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); @@ -297,7 +300,7 @@ int ubifs_read_master(struct ubifs_info *c) c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); - c->calc_idx_sz = c->old_idx_sz; + c->calc_idx_sz = c->bi.old_idx_sz; if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) c->no_orphs = 1; @@ -309,7 +312,7 @@ int ubifs_read_master(struct ubifs_info *c) if (c->leb_cnt < old_leb_cnt || c->leb_cnt < UBIFS_MIN_LEB_CNT) { ubifs_err("bad leb_cnt on master node"); - dbg_dump_node(c, c->mst_node); + ubifs_dump_node(c, c->mst_node); return -EINVAL; } @@ -335,7 +338,58 @@ int ubifs_read_master(struct ubifs_info *c) if (err) return err; +#ifndef __UBOOT__ err = dbg_old_index_check_init(c, &c->zroot); +#endif + + return err; +} + +#ifndef __UBOOT__ +/** + * ubifs_write_master - write master node. + * @c: UBIFS file-system description object + * + * This function writes the master node. The caller has to take the + * @c->mst_mutex lock before calling this function. Returns zero in case of + * success and a negative error code in case of failure. The master node is + * written twice to enable recovery. + */ +int ubifs_write_master(struct ubifs_info *c) +{ + int err, lnum, offs, len; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + + lnum = UBIFS_MST_LNUM; + offs = c->mst_offs + c->mst_node_alsz; + len = UBIFS_MST_NODE_SZ; + + if (offs + UBIFS_MST_NODE_SZ > c->leb_size) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + offs = 0; + } + + c->mst_offs = offs; + c->mst_node->highest_inum = cpu_to_le64(c->highest_inum); + + err = ubifs_write_node(c, c->mst_node, len, lnum, offs); + if (err) + return err; + + lnum += 1; + + if (offs == 0) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + err = ubifs_write_node(c, c->mst_node, len, lnum, offs); return err; } +#endif diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 609232e..4316d3c 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter @@ -27,6 +16,7 @@ #ifndef __UBIFS_MISC_H__ #define __UBIFS_MISC_H__ +#define __UBOOT__ /** * ubifs_zn_dirty - check if znode is dirty. * @znode: znode to check @@ -39,6 +29,29 @@ static inline int ubifs_zn_dirty(const struct ubifs_znode *znode) } /** + * ubifs_zn_obsolete - check if znode is obsolete. + * @znode: znode to check + * + * This helper function returns %1 if @znode is obsolete and %0 otherwise. + */ +static inline int ubifs_zn_obsolete(const struct ubifs_znode *znode) +{ + return !!test_bit(OBSOLETE_ZNODE, &znode->flags); +} + +/** + * ubifs_zn_cow - check if znode has to be copied on write. + * @znode: znode to check + * + * This helper function returns %1 if @znode is has COW flag set and %0 + * otherwise. + */ +static inline int ubifs_zn_cow(const struct ubifs_znode *znode) +{ + return !!test_bit(COW_ZNODE, &znode->flags); +} + +/** * ubifs_wake_up_bgt - wake up background thread. * @c: UBIFS file-system description object */ @@ -121,82 +134,27 @@ static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) return err; } +#ifndef __UBOOT__ /** - * ubifs_leb_unmap - unmap an LEB. - * @c: UBIFS file-system description object - * @lnum: LEB number to unmap - * - * This function returns %0 on success and a negative error code on failure. - */ -static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum) -{ - int err; - - if (c->ro_media) - return -EROFS; - err = ubi_leb_unmap(c->ubi, lnum); - if (err) { - ubifs_err("unmap LEB %d failed, error %d", lnum, err); - return err; - } - - return 0; -} - -/** - * ubifs_leb_write - write to a LEB. - * @c: UBIFS file-system description object - * @lnum: LEB number to write - * @buf: buffer to write from - * @offs: offset within LEB to write to - * @len: length to write - * @dtype: data type - * - * This function returns %0 on success and a negative error code on failure. - */ -static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum, - const void *buf, int offs, int len, int dtype) -{ - int err; - - if (c->ro_media) - return -EROFS; - err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); - if (err) { - ubifs_err("writing %d bytes at %d:%d, error %d", - len, lnum, offs, err); - return err; - } - - return 0; -} - -/** - * ubifs_leb_change - atomic LEB change. - * @c: UBIFS file-system description object - * @lnum: LEB number to write - * @buf: buffer to write from - * @len: length to write - * @dtype: data type + * ubifs_encode_dev - encode device node IDs. + * @dev: UBIFS device node information + * @rdev: device IDs to encode * - * This function returns %0 on success and a negative error code on failure. + * This is a helper function which encodes major/minor numbers of a device node + * into UBIFS device node description. We use standard Linux "new" and "huge" + * encodings. */ -static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum, - const void *buf, int len, int dtype) +static inline int ubifs_encode_dev(union ubifs_dev_desc *dev, dev_t rdev) { - int err; - - if (c->ro_media) - return -EROFS; - err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); - if (err) { - ubifs_err("changing %d bytes in LEB %d, error %d", - len, lnum, err); - return err; + if (new_valid_dev(rdev)) { + dev->new = cpu_to_le32(new_encode_dev(rdev)); + return sizeof(dev->new); + } else { + dev->huge = cpu_to_le64(huge_encode_dev(rdev)); + return sizeof(dev->huge); } - - return 0; } +#endif /** * ubifs_add_dirt - add dirty space to LEB properties. @@ -260,8 +218,24 @@ struct ubifs_branch *ubifs_idx_branch(const struct ubifs_info *c, static inline void *ubifs_idx_key(const struct ubifs_info *c, const struct ubifs_idx_node *idx) { - const __u8 *branch = idx->branches; - return (void *)((struct ubifs_branch *)branch)->key; +#ifndef __UBOOT__ + return (void *)((struct ubifs_branch *)idx->branches)->key; +#else + struct ubifs_branch *tmp; + + tmp = (struct ubifs_branch *)idx->branches; + return (void *)tmp->key; +#endif +} + +/** + * ubifs_current_time - round current time to time granularity. + * @inode: inode + */ +static inline struct timespec ubifs_current_time(struct inode *inode) +{ + return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? + current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; } /** @@ -308,4 +282,21 @@ static inline void ubifs_release_lprops(struct ubifs_info *c) mutex_unlock(&c->lp_mutex); } +/** + * ubifs_next_log_lnum - switch to the next log LEB. + * @c: UBIFS file-system description object + * @lnum: current log LEB + * + * This helper function returns the log LEB number which goes next after LEB + * 'lnum'. + */ +static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum) +{ + lnum += 1; + if (lnum > c->log_last) + lnum = UBIFS_LOG_LNUM; + + return lnum; +} + #endif /* __UBIFS_MISC_H__ */ diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index d091031..4e42879 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -3,22 +3,12 @@ * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Author: Adrian Hunter */ +#include <linux/err.h> #include "ubifs.h" /* @@ -52,6 +42,166 @@ * than the maximum number of orphans allowed. */ +static int dbg_check_orphans(struct ubifs_info *c); + +/** + * ubifs_add_orphan - add an orphan. + * @c: UBIFS file-system description object + * @inum: orphan inode number + * + * Add an orphan. This function is called when an inodes link count drops to + * zero. + */ +int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) +{ + struct ubifs_orphan *orphan, *o; + struct rb_node **p, *parent = NULL; + + orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_NOFS); + if (!orphan) + return -ENOMEM; + orphan->inum = inum; + orphan->new = 1; + + spin_lock(&c->orphan_lock); + if (c->tot_orphans >= c->max_orphans) { + spin_unlock(&c->orphan_lock); + kfree(orphan); + return -ENFILE; + } + p = &c->orph_tree.rb_node; + while (*p) { + parent = *p; + o = rb_entry(parent, struct ubifs_orphan, rb); + if (inum < o->inum) + p = &(*p)->rb_left; + else if (inum > o->inum) + p = &(*p)->rb_right; + else { + ubifs_err("orphaned twice"); + spin_unlock(&c->orphan_lock); + kfree(orphan); + return 0; + } + } + c->tot_orphans += 1; + c->new_orphans += 1; + rb_link_node(&orphan->rb, parent, p); + rb_insert_color(&orphan->rb, &c->orph_tree); + list_add_tail(&orphan->list, &c->orph_list); + list_add_tail(&orphan->new_list, &c->orph_new); + spin_unlock(&c->orphan_lock); + dbg_gen("ino %lu", (unsigned long)inum); + return 0; +} + +/** + * ubifs_delete_orphan - delete an orphan. + * @c: UBIFS file-system description object + * @inum: orphan inode number + * + * Delete an orphan. This function is called when an inode is deleted. + */ +void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) +{ + struct ubifs_orphan *o; + struct rb_node *p; + + spin_lock(&c->orphan_lock); + p = c->orph_tree.rb_node; + while (p) { + o = rb_entry(p, struct ubifs_orphan, rb); + if (inum < o->inum) + p = p->rb_left; + else if (inum > o->inum) + p = p->rb_right; + else { + if (o->del) { + spin_unlock(&c->orphan_lock); + dbg_gen("deleted twice ino %lu", + (unsigned long)inum); + return; + } + if (o->cmt) { + o->del = 1; + o->dnext = c->orph_dnext; + c->orph_dnext = o; + spin_unlock(&c->orphan_lock); + dbg_gen("delete later ino %lu", + (unsigned long)inum); + return; + } + rb_erase(p, &c->orph_tree); + list_del(&o->list); + c->tot_orphans -= 1; + if (o->new) { + list_del(&o->new_list); + c->new_orphans -= 1; + } + spin_unlock(&c->orphan_lock); + kfree(o); + dbg_gen("inum %lu", (unsigned long)inum); + return; + } + } + spin_unlock(&c->orphan_lock); + ubifs_err("missing orphan ino %lu", (unsigned long)inum); + dump_stack(); +} + +/** + * ubifs_orphan_start_commit - start commit of orphans. + * @c: UBIFS file-system description object + * + * Start commit of orphans. + */ +int ubifs_orphan_start_commit(struct ubifs_info *c) +{ + struct ubifs_orphan *orphan, **last; + + spin_lock(&c->orphan_lock); + last = &c->orph_cnext; + list_for_each_entry(orphan, &c->orph_new, new_list) { + ubifs_assert(orphan->new); + ubifs_assert(!orphan->cmt); + orphan->new = 0; + orphan->cmt = 1; + *last = orphan; + last = &orphan->cnext; + } + *last = NULL; + c->cmt_orphans = c->new_orphans; + c->new_orphans = 0; + dbg_cmt("%d orphans to commit", c->cmt_orphans); + INIT_LIST_HEAD(&c->orph_new); + if (c->tot_orphans == 0) + c->no_orphs = 1; + else + c->no_orphs = 0; + spin_unlock(&c->orphan_lock); + return 0; +} + +/** + * avail_orphs - calculate available space. + * @c: UBIFS file-system description object + * + * This function returns the number of orphans that can be written in the + * available space. + */ +static int avail_orphs(struct ubifs_info *c) +{ + int avail_lebs, avail, gap; + + avail_lebs = c->orph_lebs - (c->ohead_lnum - c->orph_first) - 1; + avail = avail_lebs * + ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)); + gap = c->leb_size - c->ohead_offs; + if (gap >= UBIFS_ORPH_NODE_SZ + sizeof(__le64)) + avail += (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64); + return avail; +} + /** * tot_avail_orphs - calculate total space. * @c: UBIFS file-system description object @@ -70,6 +220,256 @@ static int tot_avail_orphs(struct ubifs_info *c) } /** + * do_write_orph_node - write a node to the orphan head. + * @c: UBIFS file-system description object + * @len: length of node + * @atomic: write atomically + * + * This function writes a node to the orphan head from the orphan buffer. If + * %atomic is not zero, then the write is done atomically. On success, %0 is + * returned, otherwise a negative error code is returned. + */ +static int do_write_orph_node(struct ubifs_info *c, int len, int atomic) +{ + int err = 0; + + if (atomic) { + ubifs_assert(c->ohead_offs == 0); + ubifs_prepare_node(c, c->orph_buf, len, 1); + len = ALIGN(len, c->min_io_size); + err = ubifs_leb_change(c, c->ohead_lnum, c->orph_buf, len); + } else { + if (c->ohead_offs == 0) { + /* Ensure LEB has been unmapped */ + err = ubifs_leb_unmap(c, c->ohead_lnum); + if (err) + return err; + } + err = ubifs_write_node(c, c->orph_buf, len, c->ohead_lnum, + c->ohead_offs); + } + return err; +} + +/** + * write_orph_node - write an orphan node. + * @c: UBIFS file-system description object + * @atomic: write atomically + * + * This function builds an orphan node from the cnext list and writes it to the + * orphan head. On success, %0 is returned, otherwise a negative error code + * is returned. + */ +static int write_orph_node(struct ubifs_info *c, int atomic) +{ + struct ubifs_orphan *orphan, *cnext; + struct ubifs_orph_node *orph; + int gap, err, len, cnt, i; + + ubifs_assert(c->cmt_orphans > 0); + gap = c->leb_size - c->ohead_offs; + if (gap < UBIFS_ORPH_NODE_SZ + sizeof(__le64)) { + c->ohead_lnum += 1; + c->ohead_offs = 0; + gap = c->leb_size; + if (c->ohead_lnum > c->orph_last) { + /* + * We limit the number of orphans so that this should + * never happen. + */ + ubifs_err("out of space in orphan area"); + return -EINVAL; + } + } + cnt = (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64); + if (cnt > c->cmt_orphans) + cnt = c->cmt_orphans; + len = UBIFS_ORPH_NODE_SZ + cnt * sizeof(__le64); + ubifs_assert(c->orph_buf); + orph = c->orph_buf; + orph->ch.node_type = UBIFS_ORPH_NODE; + spin_lock(&c->orphan_lock); + cnext = c->orph_cnext; + for (i = 0; i < cnt; i++) { + orphan = cnext; + ubifs_assert(orphan->cmt); + orph->inos[i] = cpu_to_le64(orphan->inum); + orphan->cmt = 0; + cnext = orphan->cnext; + orphan->cnext = NULL; + } + c->orph_cnext = cnext; + c->cmt_orphans -= cnt; + spin_unlock(&c->orphan_lock); + if (c->cmt_orphans) + orph->cmt_no = cpu_to_le64(c->cmt_no); + else + /* Mark the last node of the commit */ + orph->cmt_no = cpu_to_le64((c->cmt_no) | (1ULL << 63)); + ubifs_assert(c->ohead_offs + len <= c->leb_size); + ubifs_assert(c->ohead_lnum >= c->orph_first); + ubifs_assert(c->ohead_lnum <= c->orph_last); + err = do_write_orph_node(c, len, atomic); + c->ohead_offs += ALIGN(len, c->min_io_size); + c->ohead_offs = ALIGN(c->ohead_offs, 8); + return err; +} + +/** + * write_orph_nodes - write orphan nodes until there are no more to commit. + * @c: UBIFS file-system description object + * @atomic: write atomically + * + * This function writes orphan nodes for all the orphans to commit. On success, + * %0 is returned, otherwise a negative error code is returned. + */ +static int write_orph_nodes(struct ubifs_info *c, int atomic) +{ + int err; + + while (c->cmt_orphans > 0) { + err = write_orph_node(c, atomic); + if (err) + return err; + } + if (atomic) { + int lnum; + + /* Unmap any unused LEBs after consolidation */ + lnum = c->ohead_lnum + 1; + for (lnum = c->ohead_lnum + 1; lnum <= c->orph_last; lnum++) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + } + return 0; +} + +/** + * consolidate - consolidate the orphan area. + * @c: UBIFS file-system description object + * + * This function enables consolidation by putting all the orphans into the list + * to commit. The list is in the order that the orphans were added, and the + * LEBs are written atomically in order, so at no time can orphans be lost by + * an unclean unmount. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int consolidate(struct ubifs_info *c) +{ + int tot_avail = tot_avail_orphs(c), err = 0; + + spin_lock(&c->orphan_lock); + dbg_cmt("there is space for %d orphans and there are %d", + tot_avail, c->tot_orphans); + if (c->tot_orphans - c->new_orphans <= tot_avail) { + struct ubifs_orphan *orphan, **last; + int cnt = 0; + + /* Change the cnext list to include all non-new orphans */ + last = &c->orph_cnext; + list_for_each_entry(orphan, &c->orph_list, list) { + if (orphan->new) + continue; + orphan->cmt = 1; + *last = orphan; + last = &orphan->cnext; + cnt += 1; + } + *last = NULL; + ubifs_assert(cnt == c->tot_orphans - c->new_orphans); + c->cmt_orphans = cnt; + c->ohead_lnum = c->orph_first; + c->ohead_offs = 0; + } else { + /* + * We limit the number of orphans so that this should + * never happen. + */ + ubifs_err("out of space in orphan area"); + err = -EINVAL; + } + spin_unlock(&c->orphan_lock); + return err; +} + +/** + * commit_orphans - commit orphans. + * @c: UBIFS file-system description object + * + * This function commits orphans to flash. On success, %0 is returned, + * otherwise a negative error code is returned. + */ +static int commit_orphans(struct ubifs_info *c) +{ + int avail, atomic = 0, err; + + ubifs_assert(c->cmt_orphans > 0); + avail = avail_orphs(c); + if (avail < c->cmt_orphans) { + /* Not enough space to write new orphans, so consolidate */ + err = consolidate(c); + if (err) + return err; + atomic = 1; + } + err = write_orph_nodes(c, atomic); + return err; +} + +/** + * erase_deleted - erase the orphans marked for deletion. + * @c: UBIFS file-system description object + * + * During commit, the orphans being committed cannot be deleted, so they are + * marked for deletion and deleted by this function. Also, the recovery + * adds killed orphans to the deletion list, and therefore they are deleted + * here too. + */ +static void erase_deleted(struct ubifs_info *c) +{ + struct ubifs_orphan *orphan, *dnext; + + spin_lock(&c->orphan_lock); + dnext = c->orph_dnext; + while (dnext) { + orphan = dnext; + dnext = orphan->dnext; + ubifs_assert(!orphan->new); + ubifs_assert(orphan->del); + rb_erase(&orphan->rb, &c->orph_tree); + list_del(&orphan->list); + c->tot_orphans -= 1; + dbg_gen("deleting orphan ino %lu", (unsigned long)orphan->inum); + kfree(orphan); + } + c->orph_dnext = NULL; + spin_unlock(&c->orphan_lock); +} + +/** + * ubifs_orphan_end_commit - end commit of orphans. + * @c: UBIFS file-system description object + * + * End commit of orphans. + */ +int ubifs_orphan_end_commit(struct ubifs_info *c) +{ + int err; + + if (c->cmt_orphans != 0) { + err = commit_orphans(c); + if (err) + return err; + } + erase_deleted(c); + err = dbg_check_orphans(c); + return err; +} + +/** * ubifs_clear_orphans - erase all LEBs used for orphans. * @c: UBIFS file-system description object * @@ -128,6 +528,7 @@ static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) rb_link_node(&orphan->rb, parent, p); rb_insert_color(&orphan->rb, &c->orph_tree); list_add_tail(&orphan->list, &c->orph_list); + orphan->del = 1; orphan->dnext = c->orph_dnext; c->orph_dnext = orphan; dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum, @@ -159,9 +560,9 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, list_for_each_entry(snod, &sleb->nodes, list) { if (snod->type != UBIFS_ORPH_NODE) { - ubifs_err("invalid node type %d in orphan area at " - "%d:%d", snod->type, sleb->lnum, snod->offs); - dbg_dump_node(c, snod->node); + ubifs_err("invalid node type %d in orphan area at %d:%d", + snod->type, sleb->lnum, snod->offs); + ubifs_dump_node(c, snod->node); return -EINVAL; } @@ -186,10 +587,9 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, * number. That makes this orphan node, out of date. */ if (!first) { - ubifs_err("out of order commit number %llu in " - "orphan node at %d:%d", + ubifs_err("out of order commit number %llu in orphan node at %d:%d", cmt_no, sleb->lnum, snod->offs); - dbg_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node); return -EINVAL; } dbg_rcvry("out of date LEB %d", sleb->lnum); @@ -262,9 +662,11 @@ static int kill_orphans(struct ubifs_info *c) struct ubifs_scan_leb *sleb; dbg_rcvry("LEB %d", lnum); - sleb = ubifs_scan(c, lnum, 0, c->sbuf); + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); if (IS_ERR(sleb)) { - sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); + if (PTR_ERR(sleb) == -EUCLEAN) + sleb = ubifs_recover_leb(c, lnum, 0, + c->sbuf, -1); if (IS_ERR(sleb)) { err = PTR_ERR(sleb); break; @@ -314,3 +716,232 @@ int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only) return err; } + +/* + * Everything below is related to debugging. + */ + +struct check_orphan { + struct rb_node rb; + ino_t inum; +}; + +struct check_info { + unsigned long last_ino; + unsigned long tot_inos; + unsigned long missing; + unsigned long long leaf_cnt; + struct ubifs_ino_node *node; + struct rb_root root; +}; + +static int dbg_find_orphan(struct ubifs_info *c, ino_t inum) +{ + struct ubifs_orphan *o; + struct rb_node *p; + + spin_lock(&c->orphan_lock); + p = c->orph_tree.rb_node; + while (p) { + o = rb_entry(p, struct ubifs_orphan, rb); + if (inum < o->inum) + p = p->rb_left; + else if (inum > o->inum) + p = p->rb_right; + else { + spin_unlock(&c->orphan_lock); + return 1; + } + } + spin_unlock(&c->orphan_lock); + return 0; +} + +static int dbg_ins_check_orphan(struct rb_root *root, ino_t inum) +{ + struct check_orphan *orphan, *o; + struct rb_node **p, *parent = NULL; + + orphan = kzalloc(sizeof(struct check_orphan), GFP_NOFS); + if (!orphan) + return -ENOMEM; + orphan->inum = inum; + + p = &root->rb_node; + while (*p) { + parent = *p; + o = rb_entry(parent, struct check_orphan, rb); + if (inum < o->inum) + p = &(*p)->rb_left; + else if (inum > o->inum) + p = &(*p)->rb_right; + else { + kfree(orphan); + return 0; + } + } + rb_link_node(&orphan->rb, parent, p); + rb_insert_color(&orphan->rb, root); + return 0; +} + +static int dbg_find_check_orphan(struct rb_root *root, ino_t inum) +{ + struct check_orphan *o; + struct rb_node *p; + + p = root->rb_node; + while (p) { + o = rb_entry(p, struct check_orphan, rb); + if (inum < o->inum) + p = p->rb_left; + else if (inum > o->inum) + p = p->rb_right; + else + return 1; + } + return 0; +} + +static void dbg_free_check_tree(struct rb_root *root) +{ + struct check_orphan *o, *n; + + rbtree_postorder_for_each_entry_safe(o, n, root, rb) + kfree(o); +} + +static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *priv) +{ + struct check_info *ci = priv; + ino_t inum; + int err; + + inum = key_inum(c, &zbr->key); + if (inum != ci->last_ino) { + /* Lowest node type is the inode node, so it comes first */ + if (key_type(c, &zbr->key) != UBIFS_INO_KEY) + ubifs_err("found orphan node ino %lu, type %d", + (unsigned long)inum, key_type(c, &zbr->key)); + ci->last_ino = inum; + ci->tot_inos += 1; + err = ubifs_tnc_read_node(c, zbr, ci->node); + if (err) { + ubifs_err("node read failed, error %d", err); + return err; + } + if (ci->node->nlink == 0) + /* Must be recorded as an orphan */ + if (!dbg_find_check_orphan(&ci->root, inum) && + !dbg_find_orphan(c, inum)) { + ubifs_err("missing orphan, ino %lu", + (unsigned long)inum); + ci->missing += 1; + } + } + ci->leaf_cnt += 1; + return 0; +} + +static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb) +{ + struct ubifs_scan_node *snod; + struct ubifs_orph_node *orph; + ino_t inum; + int i, n, err; + + list_for_each_entry(snod, &sleb->nodes, list) { + cond_resched(); + if (snod->type != UBIFS_ORPH_NODE) + continue; + orph = snod->node; + n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; + for (i = 0; i < n; i++) { + inum = le64_to_cpu(orph->inos[i]); + err = dbg_ins_check_orphan(&ci->root, inum); + if (err) + return err; + } + } + return 0; +} + +static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) +{ + int lnum, err = 0; + void *buf; + + /* Check no-orphans flag and skip this if no orphans */ + if (c->no_orphs) + return 0; + + buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + if (!buf) { + ubifs_err("cannot allocate memory to check orphans"); + return 0; + } + + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { + struct ubifs_scan_leb *sleb; + + sleb = ubifs_scan(c, lnum, 0, buf, 0); + if (IS_ERR(sleb)) { + err = PTR_ERR(sleb); + break; + } + + err = dbg_read_orphans(ci, sleb); + ubifs_scan_destroy(sleb); + if (err) + break; + } + + vfree(buf); + return err; +} + +static int dbg_check_orphans(struct ubifs_info *c) +{ + struct check_info ci; + int err; + + if (!dbg_is_chk_orph(c)) + return 0; + + ci.last_ino = 0; + ci.tot_inos = 0; + ci.missing = 0; + ci.leaf_cnt = 0; + ci.root = RB_ROOT; + ci.node = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); + if (!ci.node) { + ubifs_err("out of memory"); + return -ENOMEM; + } + + err = dbg_scan_orphans(c, &ci); + if (err) + goto out; + + err = dbg_walk_index(c, &dbg_orphan_check, NULL, &ci); + if (err) { + ubifs_err("cannot scan TNC, error %d", err); + goto out; + } + + if (ci.missing) { + ubifs_err("%lu missing orphan(s)", ci.missing); + err = -EINVAL; + goto out; + } + + dbg_cmt("last inode number is %lu", ci.last_ino); + dbg_cmt("total number of inodes is %lu", ci.tot_inos); + dbg_cmt("total number of leaf nodes is %llu", ci.leaf_cnt); + +out: + dbg_free_check_tree(&ci.root); + kfree(ci.node); + return err; +} diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 7444650..f54a440 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) @@ -23,13 +12,37 @@ /* * This file implements functions needed to recover from unclean un-mounts. * When UBIFS is mounted, it checks a flag on the master node to determine if - * an un-mount was completed sucessfully. If not, the process of mounting - * incorparates additional checking and fixing of on-flash data structures. + * an un-mount was completed successfully. If not, the process of mounting + * incorporates additional checking and fixing of on-flash data structures. * UBIFS always cleans away all remnants of an unclean un-mount, so that * errors do not accumulate. However UBIFS defers recovery if it is mounted * read-only, and the flash is not modified in that case. + * + * The general UBIFS approach to the recovery is that it recovers from + * corruptions which could be caused by power cuts, but it refuses to recover + * from corruption caused by other reasons. And UBIFS tries to distinguish + * between these 2 reasons of corruptions and silently recover in the former + * case and loudly complain in the latter case. + * + * UBIFS writes only to erased LEBs, so it writes only to the flash space + * containing only 0xFFs. UBIFS also always writes strictly from the beginning + * of the LEB to the end. And UBIFS assumes that the underlying flash media + * writes in @c->max_write_size bytes at a time. + * + * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min. + * I/O unit corresponding to offset X to contain corrupted data, all the + * following min. I/O units have to contain empty space (all 0xFFs). If this is + * not true, the corruption cannot be the result of a power cut, and UBIFS + * refuses to mount. */ +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/crc32.h> +#include <linux/slab.h> +#else +#include <linux/err.h> +#endif #include "ubifs.h" /** @@ -52,6 +65,25 @@ static int is_empty(void *buf, int len) } /** + * first_non_ff - find offset of the first non-0xff byte. + * @buf: buffer to search in + * @len: length of buffer + * + * This function returns offset of the first non-0xff byte in @buf or %-1 if + * the buffer contains only 0xff bytes. + */ +static int first_non_ff(void *buf, int len) +{ + uint8_t *p = buf; + int i; + + for (i = 0; i < len; i++) + if (*p++ != 0xff) + return i; + return -1; +} + +/** * get_master_node - get the last valid master node allowing for corruption. * @c: UBIFS file-system description object * @lnum: LEB number @@ -79,7 +111,7 @@ static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf, if (!sbuf) return -ENOMEM; - err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size); + err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0); if (err && err != -EBADMSG) goto out_free; @@ -175,10 +207,10 @@ static int write_rcvrd_mst_node(struct ubifs_info *c, mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY); ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); - err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum, mst, sz); if (err) goto out; - err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum + 1, mst, sz); if (err) goto out; out: @@ -236,7 +268,8 @@ int ubifs_recover_master_node(struct ubifs_info *c) if (cor1) goto out_err; mst = mst1; - } else if (offs1 == 0 && offs2 + sz >= c->leb_size) { + } else if (offs1 == 0 && + c->leb_size - offs2 - sz < sz) { /* 1st LEB was unmapped and written, 2nd not */ if (cor1) goto out_err; @@ -266,12 +299,12 @@ int ubifs_recover_master_node(struct ubifs_info *c) mst = mst2; } - dbg_rcvry("recovered master node from LEB %d", + ubifs_msg("recovered master node from LEB %d", (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); - if ((c->vfs_sb->s_flags & MS_RDONLY)) { + if (c->ro_mount) { /* Read-only mode. Keep a copy for switching to rw mode */ c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL); if (!c->rcvrd_mst_node) { @@ -279,6 +312,40 @@ int ubifs_recover_master_node(struct ubifs_info *c) goto out_free; } memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ); + + /* + * We had to recover the master node, which means there was an + * unclean reboot. However, it is possible that the master node + * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set. + * E.g., consider the following chain of events: + * + * 1. UBIFS was cleanly unmounted, so the master node is clean + * 2. UBIFS is being mounted R/W and starts changing the master + * node in the first (%UBIFS_MST_LNUM). A power cut happens, + * so this LEB ends up with some amount of garbage at the + * end. + * 3. UBIFS is being mounted R/O. We reach this place and + * recover the master node from the second LEB + * (%UBIFS_MST_LNUM + 1). But we cannot update the media + * because we are being mounted R/O. We have to defer the + * operation. + * 4. However, this master node (@c->mst_node) is marked as + * clean (since the step 1). And if we just return, the + * mount code will be confused and won't recover the master + * node when it is re-mounter R/W later. + * + * Thus, to force the recovery by marking the master node as + * dirty. + */ + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); +#ifndef __UBOOT__ + } else { + /* Write the recovered master node */ + c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1; + err = write_rcvrd_mst_node(c, c->mst_node); + if (err) + goto out_free; +#endif } vfree(buf2); @@ -291,12 +358,12 @@ out_err: out_free: ubifs_err("failed to recover master node"); if (mst1) { - dbg_err("dumping first master node"); - dbg_dump_node(c, mst1); + ubifs_err("dumping first master node"); + ubifs_dump_node(c, mst1); } if (mst2) { - dbg_err("dumping second master node"); - dbg_dump_node(c, mst2); + ubifs_err("dumping second master node"); + ubifs_dump_node(c, mst2); } vfree(buf2); vfree(buf1); @@ -335,44 +402,23 @@ int ubifs_write_rcvrd_mst_node(struct ubifs_info *c) * @offs: offset to check * * This function returns %1 if @offs was in the last write to the LEB whose data - * is in @buf, otherwise %0 is returned. The determination is made by checking - * for subsequent empty space starting from the next min_io_size boundary (or a - * bit less than the common header size if min_io_size is one). + * is in @buf, otherwise %0 is returned. The determination is made by checking + * for subsequent empty space starting from the next @c->max_write_size + * boundary. */ static int is_last_write(const struct ubifs_info *c, void *buf, int offs) { - int empty_offs; - int check_len; + int empty_offs, check_len; uint8_t *p; - if (c->min_io_size == 1) { - check_len = c->leb_size - offs; - p = buf + check_len; - for (; check_len > 0; check_len--) - if (*--p != 0xff) - break; - /* - * 'check_len' is the size of the corruption which cannot be - * more than the size of 1 node if it was caused by an unclean - * unmount. - */ - if (check_len > UBIFS_MAX_NODE_SZ) - return 0; - return 1; - } - /* - * Round up to the next c->min_io_size boundary i.e. 'offs' is in the - * last wbuf written. After that should be empty space. + * Round up to the next @c->max_write_size boundary i.e. @offs is in + * the last wbuf written. After that should be empty space. */ - empty_offs = ALIGN(offs + 1, c->min_io_size); + empty_offs = ALIGN(offs + 1, c->max_write_size); check_len = c->leb_size - empty_offs; p = buf + empty_offs - offs; - - for (; check_len > 0; check_len--) - if (*p++ != 0xff) - return 0; - return 1; + return is_empty(p, check_len); } /** @@ -385,7 +431,7 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs) * * This function pads up to the next min_io_size boundary (if there is one) and * sets empty space to all 0xff. @buf, @offs and @len are updated to the next - * min_io_size boundary (if there is one). + * @c->min_io_size boundary. */ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, int *offs, int *len) @@ -395,11 +441,6 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, lnum = lnum; dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs); - if (c->min_io_size == 1) { - memset(*buf, 0xff, c->leb_size - *offs); - return; - } - ubifs_assert(!(*offs & 7)); empty_offs = ALIGN(*offs, c->min_io_size); pad_len = empty_offs - *offs; @@ -429,7 +470,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, int skip, dlen = le32_to_cpu(ch->len); /* Check for empty space after the corrupt node's common header */ - skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs; + skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs; if (is_empty(buf + skip, len - skip)) return 1; /* @@ -441,7 +482,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, return 0; } /* Now we know the corrupt node's length we can skip over it */ - skip = ALIGN(offs + dlen, c->min_io_size) - offs; + skip = ALIGN(offs + dlen, c->max_write_size) - offs; /* After which there should be empty space */ if (is_empty(buf + skip, len - skip)) return 1; @@ -469,7 +510,7 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, endpt = snod->offs + snod->len; } - if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) { + if (c->ro_mount && !c->remounting_rw) { /* Add to recovery list */ struct ubifs_unclean_leb *ucleb; @@ -481,21 +522,55 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ucleb->lnum = lnum; ucleb->endpt = endpt; list_add_tail(&ucleb->list, &c->unclean_leb_list); +#ifndef __UBOOT__ + } else { + /* Write the fixed LEB back to flash */ + int err; + + dbg_rcvry("fixing LEB %d start %d endpt %d", + lnum, start, sleb->endpt); + if (endpt == 0) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } else { + int len = ALIGN(endpt, c->min_io_size); + + if (start) { + err = ubifs_leb_read(c, lnum, sleb->buf, 0, + start, 1); + if (err) + return err; + } + /* Pad to min_io_size */ + if (len > endpt) { + int pad_len = len - ALIGN(endpt, 8); + + if (pad_len > 0) { + void *buf = sleb->buf + len - pad_len; + + ubifs_pad(c, buf, pad_len); + } + } + err = ubifs_leb_change(c, lnum, sleb->buf, len); + if (err) + return err; + } +#endif } return 0; } /** - * drop_incomplete_group - drop nodes from an incomplete group. + * drop_last_group - drop the last group of nodes. * @sleb: scanned LEB information * @offs: offset of dropped nodes is returned here * - * This function returns %1 if nodes are dropped and %0 otherwise. + * This is a helper function for 'ubifs_recover_leb()' which drops the last + * group of nodes of the scanned LEB. */ -static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) +static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs) { - int dropped = 0; - while (!list_empty(&sleb->nodes)) { struct ubifs_scan_node *snod; struct ubifs_ch *ch; @@ -504,15 +579,41 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) list); ch = snod->node; if (ch->group_type != UBIFS_IN_NODE_GROUP) - return dropped; - dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); + break; + + dbg_rcvry("dropping grouped node at %d:%d", + sleb->lnum, snod->offs); + *offs = snod->offs; + list_del(&snod->list); + kfree(snod); + sleb->nodes_cnt -= 1; + } +} + +/** + * drop_last_node - drop the last node. + * @sleb: scanned LEB information + * @offs: offset of dropped nodes is returned here + * @grouped: non-zero if whole group of nodes have to be dropped + * + * This is a helper function for 'ubifs_recover_leb()' which drops the last + * node of the scanned LEB. + */ +static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs) +{ + struct ubifs_scan_node *snod; + + if (!list_empty(&sleb->nodes)) { + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, + list); + + dbg_rcvry("dropping last node at %d:%d", + sleb->lnum, snod->offs); *offs = snod->offs; list_del(&snod->list); kfree(snod); sleb->nodes_cnt -= 1; - dropped = 1; } - return dropped; } /** @@ -521,33 +622,30 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) * @lnum: LEB number * @offs: offset * @sbuf: LEB-sized buffer to use - * @grouped: nodes may be grouped for recovery + * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not + * belong to any journal head) * * This function does a scan of a LEB, but caters for errors that might have * been caused by the unclean unmount from which we are attempting to recover. - * - * This function returns %0 on success and a negative error code on failure. + * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is + * found, and a negative error code in case of failure. */ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, - int offs, void *sbuf, int grouped) + int offs, void *sbuf, int jhead) { - int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; - int empty_chkd = 0, start = offs; + int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; + int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped; struct ubifs_scan_leb *sleb; void *buf = sbuf + offs; - dbg_rcvry("%d:%d", lnum, offs); + dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped); sleb = ubifs_start_scan(c, lnum, offs, sbuf); if (IS_ERR(sleb)) return sleb; - if (sleb->ecc) - need_clean = 1; - + ubifs_assert(len >= 8); while (len >= 8) { - int ret; - dbg_scan("look at LEB %d:%d (%d bytes left)", lnum, offs, len); @@ -557,8 +655,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, * Scan quietly until there is an error from which we cannot * recover */ - ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); - + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); if (ret == SCANNED_A_NODE) { /* A valid node, and not a padding node */ struct ubifs_ch *ch = buf; @@ -571,98 +668,127 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, offs += node_len; buf += node_len; len -= node_len; - continue; - } - - if (ret > 0) { + } else if (ret > 0) { /* Padding bytes or a valid padding node */ offs += ret; buf += ret; len -= ret; - continue; - } - - if (ret == SCANNED_EMPTY_SPACE) { - if (!is_empty(buf, len)) { - if (!is_last_write(c, buf, offs)) - break; - clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; - } - empty_chkd = 1; + } else if (ret == SCANNED_EMPTY_SPACE || + ret == SCANNED_GARBAGE || + ret == SCANNED_A_BAD_PAD_NODE || + ret == SCANNED_A_CORRUPT_NODE) { + dbg_rcvry("found corruption (%d) at %d:%d", + ret, lnum, offs); break; + } else { + ubifs_err("unexpected return value %d", ret); + err = -EINVAL; + goto error; } + } - if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) - if (is_last_write(c, buf, offs)) { - clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; - empty_chkd = 1; - break; - } - - if (ret == SCANNED_A_CORRUPT_NODE) - if (no_more_nodes(c, buf, len, lnum, offs)) { - clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; - empty_chkd = 1; - break; - } - - if (quiet) { - /* Redo the last scan but noisily */ - quiet = 0; - continue; - } + if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) { + if (!is_last_write(c, buf, offs)) + goto corrupted_rescan; + } else if (ret == SCANNED_A_CORRUPT_NODE) { + if (!no_more_nodes(c, buf, len, lnum, offs)) + goto corrupted_rescan; + } else if (!is_empty(buf, len)) { + if (!is_last_write(c, buf, offs)) { + int corruption = first_non_ff(buf, len); - switch (ret) { - case SCANNED_GARBAGE: - dbg_err("garbage"); - goto corrupted; - case SCANNED_A_CORRUPT_NODE: - case SCANNED_A_BAD_PAD_NODE: - dbg_err("bad node"); - goto corrupted; - default: - dbg_err("unknown"); + /* + * See header comment for this file for more + * explanations about the reasons we have this check. + */ + ubifs_err("corrupt empty space LEB %d:%d, corruption starts at %d", + lnum, offs, corruption); + /* Make sure we dump interesting non-0xFF data */ + offs += corruption; + buf += corruption; goto corrupted; } } - if (!empty_chkd && !is_empty(buf, len)) { - if (is_last_write(c, buf, offs)) { - clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; - } else { - ubifs_err("corrupt empty space at LEB %d:%d", - lnum, offs); - goto corrupted; - } - } + min_io_unit = round_down(offs, c->min_io_size); + if (grouped) + /* + * If nodes are grouped, always drop the incomplete group at + * the end. + */ + drop_last_group(sleb, &offs); - /* Drop nodes from incomplete group */ - if (grouped && drop_incomplete_group(sleb, &offs)) { - buf = sbuf + offs; - len = c->leb_size - offs; - clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; + if (jhead == GCHD) { + /* + * If this LEB belongs to the GC head then while we are in the + * middle of the same min. I/O unit keep dropping nodes. So + * basically, what we want is to make sure that the last min. + * I/O unit where we saw the corruption is dropped completely + * with all the uncorrupted nodes which may possibly sit there. + * + * In other words, let's name the min. I/O unit where the + * corruption starts B, and the previous min. I/O unit A. The + * below code tries to deal with a situation when half of B + * contains valid nodes or the end of a valid node, and the + * second half of B contains corrupted data or garbage. This + * means that UBIFS had been writing to B just before the power + * cut happened. I do not know how realistic is this scenario + * that half of the min. I/O unit had been written successfully + * and the other half not, but this is possible in our 'failure + * mode emulation' infrastructure at least. + * + * So what is the problem, why we need to drop those nodes? Why + * can't we just clean-up the second half of B by putting a + * padding node there? We can, and this works fine with one + * exception which was reproduced with power cut emulation + * testing and happens extremely rarely. + * + * Imagine the file-system is full, we run GC which starts + * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is + * the current GC head LEB). The @c->gc_lnum is -1, which means + * that GC will retain LEB X and will try to continue. Imagine + * that LEB X is currently the dirtiest LEB, and the amount of + * used space in LEB Y is exactly the same as amount of free + * space in LEB X. + * + * And a power cut happens when nodes are moved from LEB X to + * LEB Y. We are here trying to recover LEB Y which is the GC + * head LEB. We find the min. I/O unit B as described above. + * Then we clean-up LEB Y by padding min. I/O unit. And later + * 'ubifs_rcvry_gc_commit()' function fails, because it cannot + * find a dirty LEB which could be GC'd into LEB Y! Even LEB X + * does not match because the amount of valid nodes there does + * not fit the free space in LEB Y any more! And this is + * because of the padding node which we added to LEB Y. The + * user-visible effect of this which I once observed and + * analysed is that we cannot mount the file-system with + * -ENOSPC error. + * + * So obviously, to make sure that situation does not happen we + * should free min. I/O unit B in LEB Y completely and the last + * used min. I/O unit in LEB Y should be A. This is basically + * what the below code tries to do. + */ + while (offs > min_io_unit) + drop_last_node(sleb, &offs); } - if (offs % c->min_io_size) { - clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; - } + buf = sbuf + offs; + len = c->leb_size - offs; + clean_buf(c, &buf, lnum, &offs, &len); ubifs_end_scan(c, sleb, lnum, offs); - if (need_clean) { - err = fix_unclean_leb(c, sleb, start); - if (err) - goto error; - } + err = fix_unclean_leb(c, sleb, start); + if (err) + goto error; return sleb; +corrupted_rescan: + /* Re-scan the corrupted data with verbose messages */ + ubifs_err("corruption %d", ret); + ubifs_scan_a_node(c, buf, len, lnum, offs, 1); corrupted: ubifs_scanned_corruption(c, lnum, offs, buf); err = -EUCLEAN; @@ -693,22 +819,23 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, return -ENOMEM; if (c->leb_size - offs < UBIFS_CS_NODE_SZ) goto out_err; - err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ); + err = ubifs_leb_read(c, lnum, (void *)cs_node, offs, + UBIFS_CS_NODE_SZ, 0); if (err && err != -EBADMSG) goto out_free; ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); if (ret != SCANNED_A_NODE) { - dbg_err("Not a valid node"); + ubifs_err("Not a valid node"); goto out_err; } if (cs_node->ch.node_type != UBIFS_CS_NODE) { - dbg_err("Node a CS node, type is %d", cs_node->ch.node_type); + ubifs_err("Node a CS node, type is %d", cs_node->ch.node_type); goto out_err; } if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) { - dbg_err("CS node cmt_no %llu != current cmt_no %llu", - (unsigned long long)le64_to_cpu(cs_node->cmt_no), - c->cmt_no); + ubifs_err("CS node cmt_no %llu != current cmt_no %llu", + (unsigned long long)le64_to_cpu(cs_node->cmt_no), + c->cmt_no); goto out_err; } *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum); @@ -732,7 +859,8 @@ out_free: * @sbuf: LEB-sized buffer to use * * This function does a scan of a LEB, but caters for errors that might have - * been caused by the unclean unmount from which we are attempting to recover. + * been caused by unclean reboots from which we are attempting to recover + * (assume that only the last log LEB can be corrupted by an unclean reboot). * * This function returns %0 on success and a negative error code on failure. */ @@ -751,7 +879,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, * We can only recover at the end of the log, so check that the * next log LEB is empty or out of date. */ - sleb = ubifs_scan(c, next_lnum, 0, sbuf); + sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0); if (IS_ERR(sleb)) return sleb; if (sleb->nodes_cnt) { @@ -770,15 +898,15 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, } } if (snod->sqnum > cs_sqnum) { - ubifs_err("unrecoverable log corruption " - "in LEB %d", lnum); + ubifs_err("unrecoverable log corruption in LEB %d", + lnum); ubifs_scan_destroy(sleb); return ERR_PTR(-EUCLEAN); } } ubifs_scan_destroy(sleb); } - return ubifs_recover_leb(c, lnum, offs, sbuf, 0); + return ubifs_recover_leb(c, lnum, offs, sbuf, -1); } /** @@ -792,15 +920,10 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, * * This function returns %0 on success and a negative error code on failure. */ -static int recover_head(const struct ubifs_info *c, int lnum, int offs, - void *sbuf) +static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf) { - int len, err, need_clean = 0; + int len = c->max_write_size, err; - if (c->min_io_size > 1) - len = c->min_io_size; - else - len = 512; if (offs + len > c->leb_size) len = c->leb_size - offs; @@ -808,27 +931,15 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs, return 0; /* Read at the head location and check it is empty flash */ - err = ubi_read(c->ubi, lnum, sbuf, offs, len); - if (err) - need_clean = 1; - else { - uint8_t *p = sbuf; - - while (len--) - if (*p++ != 0xff) { - need_clean = 1; - break; - } - } - - if (need_clean) { + err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1); + if (err || !is_empty(sbuf, len)) { dbg_rcvry("cleaning head at %d:%d", lnum, offs); if (offs == 0) return ubifs_leb_unmap(c, lnum); - err = ubi_read(c->ubi, lnum, sbuf, 0, offs); + err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1); if (err) return err; - return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN); + return ubifs_leb_change(c, lnum, sbuf, offs); } return 0; @@ -851,11 +962,11 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs, * * This function returns %0 on success and a negative error code on failure. */ -int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) +int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf) { int err; - ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw); + ubifs_assert(!c->ro_mount || c->remounting_rw); dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs); err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf); @@ -871,7 +982,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) } /** - * clean_an_unclean_leb - read and write a LEB to remove corruption. + * clean_an_unclean_leb - read and write a LEB to remove corruption. * @c: UBIFS file-system description object * @ucleb: unclean LEB information * @sbuf: LEB-sized buffer to use @@ -882,7 +993,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) * * This function returns %0 on success and a negative error code on failure. */ -static int clean_an_unclean_leb(const struct ubifs_info *c, +static int clean_an_unclean_leb(struct ubifs_info *c, struct ubifs_unclean_leb *ucleb, void *sbuf) { int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1; @@ -898,7 +1009,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, return 0; } - err = ubi_read(c->ubi, lnum, buf, offs, len); + err = ubifs_leb_read(c, lnum, buf, offs, len, 0); if (err && err != -EBADMSG) return err; @@ -958,7 +1069,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, } /* Write back the LEB atomically */ - err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN); + err = ubifs_leb_change(c, lnum, sbuf, len); if (err) return err; @@ -978,7 +1089,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, * * This function returns %0 on success and a negative error code on failure. */ -int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) +int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf) { dbg_rcvry("recovery"); while (!list_empty(&c->unclean_leb_list)) { @@ -996,6 +1107,140 @@ int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) return 0; } +#ifndef __UBOOT__ +/** + * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit. + * @c: UBIFS file-system description object + * + * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty + * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns + * zero in case of success and a negative error code in case of failure. + */ +static int grab_empty_leb(struct ubifs_info *c) +{ + int lnum, err; + + /* + * Note, it is very important to first search for an empty LEB and then + * run the commit, not vice-versa. The reason is that there might be + * only one empty LEB at the moment, the one which has been the + * @c->gc_lnum just before the power cut happened. During the regular + * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no + * one but GC can grab it. But at this moment this single empty LEB is + * not marked as taken, so if we run commit - what happens? Right, the + * commit will grab it and write the index there. Remember that the + * index always expands as long as there is free space, and it only + * starts consolidating when we run out of space. + * + * IOW, if we run commit now, we might not be able to find a free LEB + * after this. + */ + lnum = ubifs_find_free_leb_for_idx(c); + if (lnum < 0) { + ubifs_err("could not find an empty LEB"); + ubifs_dump_lprops(c); + ubifs_dump_budg(c, &c->bi); + return lnum; + } + + /* Reset the index flag */ + err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, + LPROPS_INDEX, 0); + if (err) + return err; + + c->gc_lnum = lnum; + dbg_rcvry("found empty LEB %d, run commit", lnum); + + return ubifs_run_commit(c); +} + +/** + * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. + * @c: UBIFS file-system description object + * + * Out-of-place garbage collection requires always one empty LEB with which to + * start garbage collection. The LEB number is recorded in c->gc_lnum and is + * written to the master node on unmounting. In the case of an unclean unmount + * the value of gc_lnum recorded in the master node is out of date and cannot + * be used. Instead, recovery must allocate an empty LEB for this purpose. + * However, there may not be enough empty space, in which case it must be + * possible to GC the dirtiest LEB into the GC head LEB. + * + * This function also runs the commit which causes the TNC updates from + * size-recovery and orphans to be written to the flash. That is important to + * ensure correct replay order for subsequent mounts. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_rcvry_gc_commit(struct ubifs_info *c) +{ + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; + struct ubifs_lprops lp; + int err; + + dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs); + + c->gc_lnum = -1; + if (wbuf->lnum == -1 || wbuf->offs == c->leb_size) + return grab_empty_leb(c); + + err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); + if (err) { + if (err != -ENOSPC) + return err; + + dbg_rcvry("could not find a dirty LEB"); + return grab_empty_leb(c); + } + + ubifs_assert(!(lp.flags & LPROPS_INDEX)); + ubifs_assert(lp.free + lp.dirty >= wbuf->offs); + + /* + * We run the commit before garbage collection otherwise subsequent + * mounts will see the GC and orphan deletion in a different order. + */ + dbg_rcvry("committing"); + err = ubifs_run_commit(c); + if (err) + return err; + + dbg_rcvry("GC'ing LEB %d", lp.lnum); + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + err = ubifs_garbage_collect_leb(c, &lp); + if (err >= 0) { + int err2 = ubifs_wbuf_sync_nolock(wbuf); + + if (err2) + err = err2; + } + mutex_unlock(&wbuf->io_mutex); + if (err < 0) { + ubifs_err("GC failed, error %d", err); + if (err == -EAGAIN) + err = -EINVAL; + return err; + } + + ubifs_assert(err == LEB_RETAINED); + if (err != LEB_RETAINED) + return -EINVAL; + + err = ubifs_leb_unmap(c, c->gc_lnum); + if (err) + return err; + + dbg_rcvry("allocated LEB %d for GC", lp.lnum); + return 0; +} +#else +int ubifs_rcvry_gc_commit(struct ubifs_info *c) +{ + return 0; +} +#endif + /** * struct size_entry - inode size information for recovery. * @rb: link in the RB-tree of sizes @@ -1090,6 +1335,23 @@ static void remove_ino(struct ubifs_info *c, ino_t inum) } /** + * ubifs_destroy_size_tree - free resources related to the size tree. + * @c: UBIFS file-system description object + */ +void ubifs_destroy_size_tree(struct ubifs_info *c) +{ + struct size_entry *e, *n; + + rbtree_postorder_for_each_entry_safe(e, n, &c->size_tree, rb) { + if (e->inode) + iput(e->inode); + kfree(e); + } + + c->size_tree = RB_ROOT; +} + +/** * ubifs_recover_size_accum - accumulate inode sizes for recovery. * @c: UBIFS file-system description object * @key: node key @@ -1157,6 +1419,64 @@ int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, return 0; } +#ifndef __UBOOT__ +/** + * fix_size_in_place - fix inode size in place on flash. + * @c: UBIFS file-system description object + * @e: inode size information for recovery + */ +static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) +{ + struct ubifs_ino_node *ino = c->sbuf; + unsigned char *p; + union ubifs_key key; + int err, lnum, offs, len; + loff_t i_size; + uint32_t crc; + + /* Locate the inode node LEB number and offset */ + ino_key_init(c, &key, e->inum); + err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs); + if (err) + goto out; + /* + * If the size recorded on the inode node is greater than the size that + * was calculated from nodes in the journal then don't change the inode. + */ + i_size = le64_to_cpu(ino->size); + if (i_size >= e->d_size) + return 0; + /* Read the LEB */ + err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1); + if (err) + goto out; + /* Change the size field and recalculate the CRC */ + ino = c->sbuf + offs; + ino->size = cpu_to_le64(e->d_size); + len = le32_to_cpu(ino->ch.len); + crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8); + ino->ch.crc = cpu_to_le32(crc); + /* Work out where data in the LEB ends and free space begins */ + p = c->sbuf; + len = c->leb_size - 1; + while (p[len] == 0xff) + len -= 1; + len = ALIGN(len + 1, c->min_io_size); + /* Atomically write the fixed LEB back again */ + err = ubifs_leb_change(c, lnum, c->sbuf, len); + if (err) + goto out; + dbg_rcvry("inode %lu at %d:%d size %lld -> %lld", + (unsigned long)e->inum, lnum, offs, i_size, e->d_size); + return 0; + +out: + ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d", + (unsigned long)e->inum, e->i_size, e->d_size, err); + return err; +} +#endif + /** * ubifs_recover_size - recover inode size. * @c: UBIFS file-system description object @@ -1196,30 +1516,48 @@ int ubifs_recover_size(struct ubifs_info *c) e->i_size = le64_to_cpu(ino->size); } } + if (e->exists && e->i_size < e->d_size) { - if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) { + if (c->ro_mount) { /* Fix the inode size and pin it in memory */ struct inode *inode; + struct ubifs_inode *ui; + + ubifs_assert(!e->inode); inode = ubifs_iget(c->vfs_sb, e->inum); if (IS_ERR(inode)) return PTR_ERR(inode); + + ui = ubifs_inode(inode); if (inode->i_size < e->d_size) { dbg_rcvry("ino %lu size %lld -> %lld", (unsigned long)e->inum, - e->d_size, inode->i_size); + inode->i_size, e->d_size); inode->i_size = e->d_size; - ubifs_inode(inode)->ui_size = e->d_size; + ui->ui_size = e->d_size; + ui->synced_i_size = e->d_size; e->inode = inode; this = rb_next(this); continue; } iput(inode); +#ifndef __UBOOT__ + } else { + /* Fix the size in place */ + err = fix_size_in_place(c, e); + if (err) + return err; + if (e->inode) + iput(e->inode); +#endif } } + this = rb_next(this); rb_erase(&e->rb, &c->size_tree); kfree(e); } + return 0; } diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index da33a14..6393b15 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) @@ -32,44 +21,38 @@ * larger is the journal, the more memory its index may consume. */ +#define __UBOOT__ +#ifdef __UBOOT__ +#include <linux/compat.h> +#include <linux/err.h> +#endif #include "ubifs.h" - -/* - * Replay flags. - * - * REPLAY_DELETION: node was deleted - * REPLAY_REF: node is a reference node - */ -enum { - REPLAY_DELETION = 1, - REPLAY_REF = 2, -}; +#include <linux/list_sort.h> /** - * struct replay_entry - replay tree entry. + * struct replay_entry - replay list entry. * @lnum: logical eraseblock number of the node * @offs: node offset * @len: node length + * @deletion: non-zero if this entry corresponds to a node deletion * @sqnum: node sequence number - * @flags: replay flags - * @rb: links the replay tree + * @list: links the replay list * @key: node key * @nm: directory entry name * @old_size: truncation old size * @new_size: truncation new size - * @free: amount of free space in a bud - * @dirty: amount of dirty space in a bud from padding and deletion nodes * - * UBIFS journal replay must compare node sequence numbers, which means it must - * build a tree of node information to insert into the TNC. + * The replay process first scans all buds and builds the replay list, then + * sorts the replay list in nodes sequence number order, and then inserts all + * the replay entries to the TNC. */ struct replay_entry { int lnum; int offs; int len; + unsigned int deletion:1; unsigned long long sqnum; - int flags; - struct rb_node rb; + struct list_head list; union ubifs_key key; union { struct qstr nm; @@ -77,10 +60,6 @@ struct replay_entry { loff_t old_size; loff_t new_size; }; - struct { - int free; - int dirty; - }; }; }; @@ -88,83 +67,117 @@ struct replay_entry { * struct bud_entry - entry in the list of buds to replay. * @list: next bud in the list * @bud: bud description object - * @free: free bytes in the bud * @sqnum: reference node sequence number + * @free: free bytes in the bud + * @dirty: dirty bytes in the bud */ struct bud_entry { struct list_head list; struct ubifs_bud *bud; - int free; unsigned long long sqnum; + int free; + int dirty; }; +#ifndef __UBOOT__ /** * set_bud_lprops - set free and dirty space used by a bud. * @c: UBIFS file-system description object - * @r: replay entry of bud + * @b: bud entry which describes the bud + * + * This function makes sure the LEB properties of bud @b are set correctly + * after the replay. Returns zero in case of success and a negative error code + * in case of failure. */ -static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) +static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) { const struct ubifs_lprops *lp; int err = 0, dirty; ubifs_get_lprops(c); - lp = ubifs_lpt_lookup_dirty(c, r->lnum); + lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); if (IS_ERR(lp)) { err = PTR_ERR(lp); goto out; } dirty = lp->dirty; - if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { + if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { /* * The LEB was added to the journal with a starting offset of * zero which means the LEB must have been empty. The LEB - * property values should be lp->free == c->leb_size and - * lp->dirty == 0, but that is not the case. The reason is that - * the LEB was garbage collected. The garbage collector resets - * the free and dirty space without recording it anywhere except - * lprops, so if there is not a commit then lprops does not have - * that information next time the file system is mounted. + * property values should be @lp->free == @c->leb_size and + * @lp->dirty == 0, but that is not the case. The reason is that + * the LEB had been garbage collected before it became the bud, + * and there was not commit inbetween. The garbage collector + * resets the free and dirty space without recording it + * anywhere except lprops, so if there was no commit then + * lprops does not have that information. * * We do not need to adjust free space because the scan has told * us the exact value which is recorded in the replay entry as - * r->free. + * @b->free. * * However we do need to subtract from the dirty space the * amount of space that the garbage collector reclaimed, which * is the whole LEB minus the amount of space that was free. */ - dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, + dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, lp->free, lp->dirty); - dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, + dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, lp->free, lp->dirty); dirty -= c->leb_size - lp->free; /* * If the replay order was perfect the dirty space would now be - * zero. The order is not perfect because the the journal heads + * zero. The order is not perfect because the journal heads * race with each other. This is not a problem but is does mean * that the dirty space may temporarily exceed c->leb_size * during the replay. */ if (dirty != 0) - dbg_msg("LEB %d lp: %d free %d dirty " - "replay: %d free %d dirty", r->lnum, lp->free, - lp->dirty, r->free, r->dirty); + dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty", + b->bud->lnum, lp->free, lp->dirty, b->free, + b->dirty); } - lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, + lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, lp->flags | LPROPS_TAKEN, 0); if (IS_ERR(lp)) { err = PTR_ERR(lp); goto out; } + + /* Make sure the journal head points to the latest bud */ + err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, + b->bud->lnum, c->leb_size - b->free); + out: ubifs_release_lprops(c); return err; } /** + * set_buds_lprops - set free and dirty space for all replayed buds. + * @c: UBIFS file-system description object + * + * This function sets LEB properties for all replayed buds. Returns zero in + * case of success and a negative error code in case of failure. + */ +static int set_buds_lprops(struct ubifs_info *c) +{ + struct bud_entry *b; + int err; + + list_for_each_entry(b, &c->replay_buds, list) { + err = set_bud_lprops(c, b); + if (err) + return err; + } + + return 0; +} + +/** * trun_remove_range - apply a replay entry for a truncation to the TNC. * @c: UBIFS file-system description object * @r: replay entry of truncation @@ -200,24 +213,22 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) */ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) { - int err, deletion = ((r->flags & REPLAY_DELETION) != 0); + int err; - dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, - r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); + dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ", + r->lnum, r->offs, r->len, r->deletion, r->sqnum); /* Set c->replay_sqnum to help deal with dangling branches. */ c->replay_sqnum = r->sqnum; - if (r->flags & REPLAY_REF) - err = set_bud_lprops(c, r); - else if (is_hash_key(c, &r->key)) { - if (deletion) + if (is_hash_key(c, &r->key)) { + if (r->deletion) err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); else err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, r->len, &r->nm); } else { - if (deletion) + if (r->deletion) switch (key_type(c, &r->key)) { case UBIFS_INO_KEY: { @@ -240,7 +251,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) return err; if (c->need_recovery) - err = ubifs_recover_size_accum(c, &r->key, deletion, + err = ubifs_recover_size_accum(c, &r->key, r->deletion, r->new_size); } @@ -248,68 +259,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) } /** - * destroy_replay_tree - destroy the replay. - * @c: UBIFS file-system description object + * replay_entries_cmp - compare 2 replay entries. + * @priv: UBIFS file-system description object + * @a: first replay entry + * @a: second replay entry * - * Destroy the replay tree. + * This is a comparios function for 'list_sort()' which compares 2 replay + * entries @a and @b by comparing their sequence numer. Returns %1 if @a has + * greater sequence number and %-1 otherwise. */ -static void destroy_replay_tree(struct ubifs_info *c) +static int replay_entries_cmp(void *priv, struct list_head *a, + struct list_head *b) { - struct rb_node *this = c->replay_tree.rb_node; - struct replay_entry *r; - - while (this) { - if (this->rb_left) { - this = this->rb_left; - continue; - } else if (this->rb_right) { - this = this->rb_right; - continue; - } - r = rb_entry(this, struct replay_entry, rb); - this = rb_parent(this); - if (this) { - if (this->rb_left == &r->rb) - this->rb_left = NULL; - else - this->rb_right = NULL; - } - if (is_hash_key(c, &r->key)) - kfree((void *)r->nm.name); - kfree(r); - } - c->replay_tree = RB_ROOT; + struct replay_entry *ra, *rb; + + cond_resched(); + if (a == b) + return 0; + + ra = list_entry(a, struct replay_entry, list); + rb = list_entry(b, struct replay_entry, list); + ubifs_assert(ra->sqnum != rb->sqnum); + if (ra->sqnum > rb->sqnum) + return 1; + return -1; } /** - * apply_replay_tree - apply the replay tree to the TNC. + * apply_replay_list - apply the replay list to the TNC. * @c: UBIFS file-system description object * - * Apply the replay tree. - * Returns zero in case of success and a negative error code in case of - * failure. + * Apply all entries in the replay list to the TNC. Returns zero in case of + * success and a negative error code in case of failure. */ -static int apply_replay_tree(struct ubifs_info *c) +static int apply_replay_list(struct ubifs_info *c) { - struct rb_node *this = rb_first(&c->replay_tree); + struct replay_entry *r; + int err; - while (this) { - struct replay_entry *r; - int err; + list_sort(c, &c->replay_list, &replay_entries_cmp); + list_for_each_entry(r, &c->replay_list, list) { cond_resched(); - r = rb_entry(this, struct replay_entry, rb); err = apply_replay_entry(c, r); if (err) return err; - this = rb_next(this); } + return 0; } /** - * insert_node - insert a node to the replay tree. + * destroy_replay_list - destroy the replay. + * @c: UBIFS file-system description object + * + * Destroy the replay list. + */ +static void destroy_replay_list(struct ubifs_info *c) +{ + struct replay_entry *r, *tmp; + + list_for_each_entry_safe(r, tmp, &c->replay_list, list) { + if (is_hash_key(c, &r->key)) + kfree(r->nm.name); + list_del(&r->list); + kfree(r); + } +} + +/** + * insert_node - insert a node to the replay list * @c: UBIFS file-system description object * @lnum: node logical eraseblock number * @offs: node offset @@ -321,39 +341,25 @@ static int apply_replay_tree(struct ubifs_info *c) * @old_size: truncation old size * @new_size: truncation new size * - * This function inserts a scanned non-direntry node to the replay tree. The - * replay tree is an RB-tree containing @struct replay_entry elements which are - * indexed by the sequence number. The replay tree is applied at the very end - * of the replay process. Since the tree is sorted in sequence number order, - * the older modifications are applied first. This function returns zero in - * case of success and a negative error code in case of failure. + * This function inserts a scanned non-direntry node to the replay list. The + * replay list contains @struct replay_entry elements, and we sort this list in + * sequence number order before applying it. The replay list is applied at the + * very end of the replay process. Since the list is sorted in sequence number + * order, the older modifications are applied first. This function returns zero + * in case of success and a negative error code in case of failure. */ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, union ubifs_key *key, unsigned long long sqnum, int deletion, int *used, loff_t old_size, loff_t new_size) { - struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; struct replay_entry *r; + dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); + if (key_inum(c, key) >= c->highest_inum) c->highest_inum = key_inum(c, key); - dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); - while (*p) { - parent = *p; - r = rb_entry(parent, struct replay_entry, rb); - if (sqnum < r->sqnum) { - p = &(*p)->rb_left; - continue; - } else if (sqnum > r->sqnum) { - p = &(*p)->rb_right; - continue; - } - ubifs_err("duplicate sqnum in replay"); - return -EINVAL; - } - r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); if (!r) return -ENOMEM; @@ -363,19 +369,18 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, r->lnum = lnum; r->offs = offs; r->len = len; + r->deletion = !!deletion; r->sqnum = sqnum; - r->flags = (deletion ? REPLAY_DELETION : 0); + key_copy(c, key, &r->key); r->old_size = old_size; r->new_size = new_size; - key_copy(c, key, &r->key); - rb_link_node(&r->rb, parent, p); - rb_insert_color(&r->rb, &c->replay_tree); + list_add_tail(&r->list, &c->replay_list); return 0; } /** - * insert_dent - insert a directory entry node into the replay tree. + * insert_dent - insert a directory entry node into the replay list. * @c: UBIFS file-system description object * @lnum: node logical eraseblock number * @offs: node offset @@ -387,43 +392,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, * @deletion: non-zero if this is a deletion * @used: number of bytes in use in a LEB * - * This function inserts a scanned directory entry node to the replay tree. - * Returns zero in case of success and a negative error code in case of - * failure. - * - * This function is also used for extended attribute entries because they are - * implemented as directory entry nodes. + * This function inserts a scanned directory entry node or an extended + * attribute entry to the replay list. Returns zero in case of success and a + * negative error code in case of failure. */ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, union ubifs_key *key, const char *name, int nlen, unsigned long long sqnum, int deletion, int *used) { - struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; struct replay_entry *r; char *nbuf; + dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); if (key_inum(c, key) >= c->highest_inum) c->highest_inum = key_inum(c, key); - dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); - while (*p) { - parent = *p; - r = rb_entry(parent, struct replay_entry, rb); - if (sqnum < r->sqnum) { - p = &(*p)->rb_left; - continue; - } - if (sqnum > r->sqnum) { - p = &(*p)->rb_right; - continue; - } - ubifs_err("duplicate sqnum in replay"); - return -EINVAL; - } - r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); if (!r) return -ENOMEM; + nbuf = kmalloc(nlen + 1, GFP_KERNEL); if (!nbuf) { kfree(r); @@ -435,19 +422,18 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, r->lnum = lnum; r->offs = offs; r->len = len; + r->deletion = !!deletion; r->sqnum = sqnum; + key_copy(c, key, &r->key); r->nm.len = nlen; memcpy(nbuf, name, nlen); nbuf[nlen] = '\0'; r->nm.name = nbuf; - r->flags = (deletion ? REPLAY_DELETION : 0); - key_copy(c, key, &r->key); - ubifs_assert(!*p); - rb_link_node(&r->rb, parent, p); - rb_insert_color(&r->rb, &c->replay_tree); + list_add_tail(&r->list, &c->replay_list); return 0; } +#endif /** * ubifs_validate_entry - validate directory or extended attribute entry node. @@ -466,7 +452,7 @@ int ubifs_validate_entry(struct ubifs_info *c, if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || dent->type >= UBIFS_ITYPES_CNT || nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || - strnlen((char *)dent->name, nlen) != nlen || + strnlen(dent->name, nlen) != nlen || le64_to_cpu(dent->inum) > MAX_INUM) { ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ? "directory entry" : "extended attribute entry"); @@ -481,32 +467,94 @@ int ubifs_validate_entry(struct ubifs_info *c, return 0; } +#ifndef __UBOOT__ +/** + * is_last_bud - check if the bud is the last in the journal head. + * @c: UBIFS file-system description object + * @bud: bud description object + * + * This function checks if bud @bud is the last bud in its journal head. This + * information is then used by 'replay_bud()' to decide whether the bud can + * have corruptions or not. Indeed, only last buds can be corrupted by power + * cuts. Returns %1 if this is the last bud, and %0 if not. + */ +static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) +{ + struct ubifs_jhead *jh = &c->jheads[bud->jhead]; + struct ubifs_bud *next; + uint32_t data; + int err; + + if (list_is_last(&bud->list, &jh->buds_list)) + return 1; + + /* + * The following is a quirk to make sure we work correctly with UBIFS + * images used with older UBIFS. + * + * Normally, the last bud will be the last in the journal head's list + * of bud. However, there is one exception if the UBIFS image belongs + * to older UBIFS. This is fairly unlikely: one would need to use old + * UBIFS, then have a power cut exactly at the right point, and then + * try to mount this image with new UBIFS. + * + * The exception is: it is possible to have 2 buds A and B, A goes + * before B, and B is the last, bud B is contains no data, and bud A is + * corrupted at the end. The reason is that in older versions when the + * journal code switched the next bud (from A to B), it first added a + * log reference node for the new bud (B), and only after this it + * synchronized the write-buffer of current bud (A). But later this was + * changed and UBIFS started to always synchronize the write-buffer of + * the bud (A) before writing the log reference for the new bud (B). + * + * But because older UBIFS always synchronized A's write-buffer before + * writing to B, we can recognize this exceptional situation but + * checking the contents of bud B - if it is empty, then A can be + * treated as the last and we can recover it. + * + * TODO: remove this piece of code in a couple of years (today it is + * 16.05.2011). + */ + next = list_entry(bud->list.next, struct ubifs_bud, list); + if (!list_is_last(&next->list, &jh->buds_list)) + return 0; + + err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1); + if (err) + return 0; + + return data == 0xFFFFFFFF; +} + /** * replay_bud - replay a bud logical eraseblock. * @c: UBIFS file-system description object - * @lnum: bud logical eraseblock number to replay - * @offs: bud start offset - * @jhead: journal head to which this bud belongs - * @free: amount of free space in the bud is returned here - * @dirty: amount of dirty space from padding and deletion nodes is returned - * here + * @b: bud entry which describes the bud * - * This function returns zero in case of success and a negative error code in - * case of failure. + * This function replays bud @bud, recovers it if needed, and adds all nodes + * from this bud to the replay list. Returns zero in case of success and a + * negative error code in case of failure. */ -static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, - int *free, int *dirty) +static int replay_bud(struct ubifs_info *c, struct bud_entry *b) { - int err = 0, used = 0; + int is_last = is_last_bud(c, b->bud); + int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; struct ubifs_scan_leb *sleb; struct ubifs_scan_node *snod; - struct ubifs_bud *bud; - dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); - if (c->need_recovery) - sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); + dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", + lnum, b->bud->jhead, offs, is_last); + + if (c->need_recovery && is_last) + /* + * Recover only last LEBs in the journal heads, because power + * cuts may cause corruptions only in these LEBs, because only + * these LEBs could possibly be written to at the power cut + * time. + */ + sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); else - sleb = ubifs_scan(c, lnum, offs, c->sbuf); + sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); if (IS_ERR(sleb)) return PTR_ERR(sleb); @@ -580,7 +628,7 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, goto out_dump; err = insert_dent(c, lnum, snod->offs, snod->len, - &snod->key, (char *)dent->name, + &snod->key, dent->name, le16_to_cpu(dent->nlen), snod->sqnum, !le64_to_cpu(dent->inum), &used); break; @@ -620,15 +668,14 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, goto out; } - bud = ubifs_search_bud(c, lnum); - if (!bud) - BUG(); - + ubifs_assert(ubifs_search_bud(c, lnum)); ubifs_assert(sleb->endpt - offs >= used); ubifs_assert(sleb->endpt % c->min_io_size == 0); - *dirty = sleb->endpt - offs - used; - *free = c->leb_size - sleb->endpt; + b->dirty = sleb->endpt - offs - used; + b->free = c->leb_size - sleb->endpt; + dbg_mnt("bud LEB %d replied: dirty %d, free %d", + lnum, b->dirty, b->free); out: ubifs_scan_destroy(sleb); @@ -636,61 +683,12 @@ out: out_dump: ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); - dbg_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node); ubifs_scan_destroy(sleb); return -EINVAL; } /** - * insert_ref_node - insert a reference node to the replay tree. - * @c: UBIFS file-system description object - * @lnum: node logical eraseblock number - * @offs: node offset - * @sqnum: sequence number - * @free: amount of free space in bud - * @dirty: amount of dirty space from padding and deletion nodes - * - * This function inserts a reference node to the replay tree and returns zero - * in case of success or a negative error code in case of failure. - */ -static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, - unsigned long long sqnum, int free, int dirty) -{ - struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; - struct replay_entry *r; - - dbg_mnt("add ref LEB %d:%d", lnum, offs); - while (*p) { - parent = *p; - r = rb_entry(parent, struct replay_entry, rb); - if (sqnum < r->sqnum) { - p = &(*p)->rb_left; - continue; - } else if (sqnum > r->sqnum) { - p = &(*p)->rb_right; - continue; - } - ubifs_err("duplicate sqnum in replay tree"); - return -EINVAL; - } - - r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); - if (!r) - return -ENOMEM; - - r->lnum = lnum; - r->offs = offs; - r->sqnum = sqnum; - r->flags = REPLAY_REF; - r->free = free; - r->dirty = dirty; - - rb_link_node(&r->rb, parent, p); - rb_insert_color(&r->rb, &c->replay_tree); - return 0; -} - -/** * replay_buds - replay all buds. * @c: UBIFS file-system description object * @@ -700,17 +698,16 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, static int replay_buds(struct ubifs_info *c) { struct bud_entry *b; - int err, uninitialized_var(free), uninitialized_var(dirty); + int err; + unsigned long long prev_sqnum = 0; list_for_each_entry(b, &c->replay_buds, list) { - err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, - &free, &dirty); - if (err) - return err; - err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, - free, dirty); + err = replay_bud(c, b); if (err) return err; + + ubifs_assert(b->sqnum > prev_sqnum); + prev_sqnum = b->sqnum; } return 0; @@ -831,10 +828,16 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) const struct ubifs_cs_node *node; dbg_mnt("replay log LEB %d:%d", lnum, offs); - sleb = ubifs_scan(c, lnum, offs, sbuf); + sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery); if (IS_ERR(sleb)) { - if (c->need_recovery) - sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); + if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) + return PTR_ERR(sleb); + /* + * Note, the below function will recover this log LEB only if + * it is the last, because unclean reboots can possibly corrupt + * only the tail of the log. + */ + sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); if (IS_ERR(sleb)) return PTR_ERR(sleb); } @@ -845,7 +848,6 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) } node = sleb->buf; - snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); if (c->cs_sqnum == 0) { /* @@ -856,16 +858,15 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) * numbers. */ if (snod->type != UBIFS_CS_NODE) { - dbg_err("first log node at LEB %d:%d is not CS node", - lnum, offs); + ubifs_err("first log node at LEB %d:%d is not CS node", + lnum, offs); goto out_dump; } if (le64_to_cpu(node->cmt_no) != c->cmt_no) { - dbg_err("first CS node at LEB %d:%d has wrong " - "commit number %llu expected %llu", - lnum, offs, - (unsigned long long)le64_to_cpu(node->cmt_no), - c->cmt_no); + ubifs_err("first CS node at LEB %d:%d has wrong commit number %llu expected %llu", + lnum, offs, + (unsigned long long)le64_to_cpu(node->cmt_no), + c->cmt_no); goto out_dump; } @@ -887,12 +888,11 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) /* Make sure the first node sits at offset zero of the LEB */ if (snod->offs != 0) { - dbg_err("first node is not at zero offset"); + ubifs_err("first node is not at zero offset"); goto out_dump; } list_for_each_entry(snod, &sleb->nodes, list) { - cond_resched(); if (snod->sqnum >= SQNUM_WATERMARK) { @@ -901,8 +901,8 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) } if (snod->sqnum < c->cs_sqnum) { - dbg_err("bad sqnum %llu, commit sqnum %llu", - snod->sqnum, c->cs_sqnum); + ubifs_err("bad sqnum %llu, commit sqnum %llu", + snod->sqnum, c->cs_sqnum); goto out_dump; } @@ -952,9 +952,9 @@ out: return err; out_dump: - ubifs_err("log error detected while replying the log at LEB %d:%d", + ubifs_err("log error detected while replaying the log at LEB %d:%d", lnum, offs + snod->offs); - dbg_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node); ubifs_scan_destroy(sleb); return -EINVAL; } @@ -1004,67 +1004,64 @@ out: */ int ubifs_replay_journal(struct ubifs_info *c) { - int err, i, lnum, offs, _free; - void *sbuf = NULL; + int err, lnum, free; BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); /* Update the status of the index head in lprops to 'taken' */ - _free = take_ihead(c); - if (_free < 0) - return _free; /* Error code */ + free = take_ihead(c); + if (free < 0) + return free; /* Error code */ - if (c->ihead_offs != c->leb_size - _free) { + if (c->ihead_offs != c->leb_size - free) { ubifs_err("bad index head LEB %d:%d", c->ihead_lnum, c->ihead_offs); return -EINVAL; } - sbuf = vmalloc(c->leb_size); - if (!sbuf) - return -ENOMEM; - dbg_mnt("start replaying the journal"); - c->replaying = 1; - lnum = c->ltail_lnum = c->lhead_lnum; - offs = c->lhead_offs; - for (i = 0; i < c->log_lebs; i++, lnum++) { - if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) { - /* - * The log is logically circular, we reached the last - * LEB, switch to the first one. - */ - lnum = UBIFS_LOG_LNUM; - offs = 0; - } - err = replay_log_leb(c, lnum, offs, sbuf); + do { + err = replay_log_leb(c, lnum, 0, c->sbuf); if (err == 1) /* We hit the end of the log */ break; if (err) goto out; - offs = 0; - } + lnum = ubifs_next_log_lnum(c, lnum); + } while (lnum != c->ltail_lnum); err = replay_buds(c); if (err) goto out; - err = apply_replay_tree(c); + err = apply_replay_list(c); if (err) goto out; + err = set_buds_lprops(c); + if (err) + goto out; + + /* + * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable + * to roughly estimate index growth. Things like @c->bi.min_idx_lebs + * depend on it. This means we have to initialize it to make sure + * budgeting works properly. + */ + c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); + c->bi.uncommitted_idx *= c->max_idx_node_sz; + ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); - dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " - "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, + dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu", + c->lhead_lnum, c->lhead_offs, c->max_sqnum, (unsigned long)c->highest_inum); out: - destroy_replay_tree(c); + destroy_replay_list(c); destroy_bud_list(c); - vfree(sbuf); c->replaying = 0; return err; } +#endif diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 00c9cd3..fc0194a 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter @@ -27,6 +16,18 @@ */ #include "ubifs.h" +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/slab.h> +#include <linux/random.h> +#include <linux/math64.h> +#else + +#include <linux/compat.h> +#include <linux/err.h> +#include <ubi_uboot.h> +#include <linux/stat.h> +#endif /* * Default journal size in logical eraseblocks as a percent of total @@ -60,6 +61,282 @@ /* Default time granularity in nanoseconds */ #define DEFAULT_TIME_GRAN 1000000000 +#ifndef __UBOOT__ +/** + * create_default_filesystem - format empty UBI volume. + * @c: UBIFS file-system description object + * + * This function creates default empty file-system. Returns zero in case of + * success and a negative error code in case of failure. + */ +static int create_default_filesystem(struct ubifs_info *c) +{ + struct ubifs_sb_node *sup; + struct ubifs_mst_node *mst; + struct ubifs_idx_node *idx; + struct ubifs_branch *br; + struct ubifs_ino_node *ino; + struct ubifs_cs_node *cs; + union ubifs_key key; + int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first; + int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0; + int min_leb_cnt = UBIFS_MIN_LEB_CNT; + long long tmp64, main_bytes; + __le64 tmp_le64; + + /* Some functions called from here depend on the @c->key_len filed */ + c->key_len = UBIFS_SK_LEN; + + /* + * First of all, we have to calculate default file-system geometry - + * log size, journal size, etc. + */ + if (c->leb_cnt < 0x7FFFFFFF / DEFAULT_JNL_PERCENT) + /* We can first multiply then divide and have no overflow */ + jnl_lebs = c->leb_cnt * DEFAULT_JNL_PERCENT / 100; + else + jnl_lebs = (c->leb_cnt / 100) * DEFAULT_JNL_PERCENT; + + if (jnl_lebs < UBIFS_MIN_JNL_LEBS) + jnl_lebs = UBIFS_MIN_JNL_LEBS; + if (jnl_lebs * c->leb_size > DEFAULT_MAX_JNL) + jnl_lebs = DEFAULT_MAX_JNL / c->leb_size; + + /* + * The log should be large enough to fit reference nodes for all bud + * LEBs. Because buds do not have to start from the beginning of LEBs + * (half of the LEB may contain committed data), the log should + * generally be larger, make it twice as large. + */ + tmp = 2 * (c->ref_node_alsz * jnl_lebs) + c->leb_size - 1; + log_lebs = tmp / c->leb_size; + /* Plus one LEB reserved for commit */ + log_lebs += 1; + if (c->leb_cnt - min_leb_cnt > 8) { + /* And some extra space to allow writes while committing */ + log_lebs += 1; + min_leb_cnt += 1; + } + + max_buds = jnl_lebs - log_lebs; + if (max_buds < UBIFS_MIN_BUD_LEBS) + max_buds = UBIFS_MIN_BUD_LEBS; + + /* + * Orphan nodes are stored in a separate area. One node can store a lot + * of orphan inode numbers, but when new orphan comes we just add a new + * orphan node. At some point the nodes are consolidated into one + * orphan node. + */ + orph_lebs = UBIFS_MIN_ORPH_LEBS; + if (c->leb_cnt - min_leb_cnt > 1) + /* + * For debugging purposes it is better to have at least 2 + * orphan LEBs, because the orphan subsystem would need to do + * consolidations and would be stressed more. + */ + orph_lebs += 1; + + main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - log_lebs; + main_lebs -= orph_lebs; + + lpt_first = UBIFS_LOG_LNUM + log_lebs; + c->lsave_cnt = DEFAULT_LSAVE_CNT; + c->max_leb_cnt = c->leb_cnt; + err = ubifs_create_dflt_lpt(c, &main_lebs, lpt_first, &lpt_lebs, + &big_lpt); + if (err) + return err; + + dbg_gen("LEB Properties Tree created (LEBs %d-%d)", lpt_first, + lpt_first + lpt_lebs - 1); + + main_first = c->leb_cnt - main_lebs; + + /* Create default superblock */ + tmp = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size); + sup = kzalloc(tmp, GFP_KERNEL); + if (!sup) + return -ENOMEM; + + tmp64 = (long long)max_buds * c->leb_size; + if (big_lpt) + sup_flags |= UBIFS_FLG_BIGLPT; + + sup->ch.node_type = UBIFS_SB_NODE; + sup->key_hash = UBIFS_KEY_HASH_R5; + sup->flags = cpu_to_le32(sup_flags); + sup->min_io_size = cpu_to_le32(c->min_io_size); + sup->leb_size = cpu_to_le32(c->leb_size); + sup->leb_cnt = cpu_to_le32(c->leb_cnt); + sup->max_leb_cnt = cpu_to_le32(c->max_leb_cnt); + sup->max_bud_bytes = cpu_to_le64(tmp64); + sup->log_lebs = cpu_to_le32(log_lebs); + sup->lpt_lebs = cpu_to_le32(lpt_lebs); + sup->orph_lebs = cpu_to_le32(orph_lebs); + sup->jhead_cnt = cpu_to_le32(DEFAULT_JHEADS_CNT); + sup->fanout = cpu_to_le32(DEFAULT_FANOUT); + sup->lsave_cnt = cpu_to_le32(c->lsave_cnt); + sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION); + sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN); + if (c->mount_opts.override_compr) + sup->default_compr = cpu_to_le16(c->mount_opts.compr_type); + else + sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO); + + generate_random_uuid(sup->uuid); + + main_bytes = (long long)main_lebs * c->leb_size; + tmp64 = div_u64(main_bytes * DEFAULT_RP_PERCENT, 100); + if (tmp64 > DEFAULT_MAX_RP_SIZE) + tmp64 = DEFAULT_MAX_RP_SIZE; + sup->rp_size = cpu_to_le64(tmp64); + sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION); + + err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0); + kfree(sup); + if (err) + return err; + + dbg_gen("default superblock created at LEB 0:0"); + + /* Create default master node */ + mst = kzalloc(c->mst_node_alsz, GFP_KERNEL); + if (!mst) + return -ENOMEM; + + mst->ch.node_type = UBIFS_MST_NODE; + mst->log_lnum = cpu_to_le32(UBIFS_LOG_LNUM); + mst->highest_inum = cpu_to_le64(UBIFS_FIRST_INO); + mst->cmt_no = 0; + mst->root_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB); + mst->root_offs = 0; + tmp = ubifs_idx_node_sz(c, 1); + mst->root_len = cpu_to_le32(tmp); + mst->gc_lnum = cpu_to_le32(main_first + DEFAULT_GC_LEB); + mst->ihead_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB); + mst->ihead_offs = cpu_to_le32(ALIGN(tmp, c->min_io_size)); + mst->index_size = cpu_to_le64(ALIGN(tmp, 8)); + mst->lpt_lnum = cpu_to_le32(c->lpt_lnum); + mst->lpt_offs = cpu_to_le32(c->lpt_offs); + mst->nhead_lnum = cpu_to_le32(c->nhead_lnum); + mst->nhead_offs = cpu_to_le32(c->nhead_offs); + mst->ltab_lnum = cpu_to_le32(c->ltab_lnum); + mst->ltab_offs = cpu_to_le32(c->ltab_offs); + mst->lsave_lnum = cpu_to_le32(c->lsave_lnum); + mst->lsave_offs = cpu_to_le32(c->lsave_offs); + mst->lscan_lnum = cpu_to_le32(main_first); + mst->empty_lebs = cpu_to_le32(main_lebs - 2); + mst->idx_lebs = cpu_to_le32(1); + mst->leb_cnt = cpu_to_le32(c->leb_cnt); + + /* Calculate lprops statistics */ + tmp64 = main_bytes; + tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size); + tmp64 -= ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size); + mst->total_free = cpu_to_le64(tmp64); + + tmp64 = ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size); + ino_waste = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size) - + UBIFS_INO_NODE_SZ; + tmp64 += ino_waste; + tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), 8); + mst->total_dirty = cpu_to_le64(tmp64); + + /* The indexing LEB does not contribute to dark space */ + tmp64 = ((long long)(c->main_lebs - 1) * c->dark_wm); + mst->total_dark = cpu_to_le64(tmp64); + + mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ); + + err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0); + if (err) { + kfree(mst); + return err; + } + err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1, + 0); + kfree(mst); + if (err) + return err; + + dbg_gen("default master node created at LEB %d:0", UBIFS_MST_LNUM); + + /* Create the root indexing node */ + tmp = ubifs_idx_node_sz(c, 1); + idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL); + if (!idx) + return -ENOMEM; + + c->key_fmt = UBIFS_SIMPLE_KEY_FMT; + c->key_hash = key_r5_hash; + + idx->ch.node_type = UBIFS_IDX_NODE; + idx->child_cnt = cpu_to_le16(1); + ino_key_init(c, &key, UBIFS_ROOT_INO); + br = ubifs_idx_branch(c, idx, 0); + key_write_idx(c, &key, &br->key); + br->lnum = cpu_to_le32(main_first + DEFAULT_DATA_LEB); + br->len = cpu_to_le32(UBIFS_INO_NODE_SZ); + err = ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0); + kfree(idx); + if (err) + return err; + + dbg_gen("default root indexing node created LEB %d:0", + main_first + DEFAULT_IDX_LEB); + + /* Create default root inode */ + tmp = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size); + ino = kzalloc(tmp, GFP_KERNEL); + if (!ino) + return -ENOMEM; + + ino_key_init_flash(c, &ino->key, UBIFS_ROOT_INO); + ino->ch.node_type = UBIFS_INO_NODE; + ino->creat_sqnum = cpu_to_le64(++c->max_sqnum); + ino->nlink = cpu_to_le32(2); + tmp_le64 = cpu_to_le64(CURRENT_TIME_SEC.tv_sec); + ino->atime_sec = tmp_le64; + ino->ctime_sec = tmp_le64; + ino->mtime_sec = tmp_le64; + ino->atime_nsec = 0; + ino->ctime_nsec = 0; + ino->mtime_nsec = 0; + ino->mode = cpu_to_le32(S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO); + ino->size = cpu_to_le64(UBIFS_INO_NODE_SZ); + + /* Set compression enabled by default */ + ino->flags = cpu_to_le32(UBIFS_COMPR_FL); + + err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ, + main_first + DEFAULT_DATA_LEB, 0); + kfree(ino); + if (err) + return err; + + dbg_gen("root inode created at LEB %d:0", + main_first + DEFAULT_DATA_LEB); + + /* + * The first node in the log has to be the commit start node. This is + * always the case during normal file-system operation. Write a fake + * commit start node to the log. + */ + tmp = ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size); + cs = kzalloc(tmp, GFP_KERNEL); + if (!cs) + return -ENOMEM; + + cs->ch.node_type = UBIFS_CS_NODE; + err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM, 0); + kfree(cs); + + ubifs_msg("default file-system created"); + return 0; +} +#endif + /** * validate_sb - validate superblock node. * @c: UBIFS file-system description object @@ -114,9 +391,8 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6; if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) { - ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, " - "%d minimum required", c->leb_cnt, c->vi.size, - min_leb_cnt); + ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, %d minimum required", + c->leb_cnt, c->vi.size, min_leb_cnt); goto failed; } @@ -127,13 +403,22 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) } if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { - err = 7; + ubifs_err("too few main LEBs count %d, must be at least %d", + c->main_lebs, UBIFS_MIN_MAIN_LEBS); + goto failed; + } + + max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS; + if (c->max_bud_bytes < max_bytes) { + ubifs_err("too small journal (%lld bytes), must be at least %lld bytes", + c->max_bud_bytes, max_bytes); goto failed; } - if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS || - c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) { - err = 8; + max_bytes = (long long)c->leb_size * c->main_lebs; + if (c->max_bud_bytes > max_bytes) { + ubifs_err("too large journal size (%lld bytes), only %lld bytes available in the main area", + c->max_bud_bytes, max_bytes); goto failed; } @@ -167,7 +452,6 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) goto failed; } - max_bytes = c->main_lebs * (long long)c->leb_size; if (c->rp_size < 0 || max_bytes < c->rp_size) { err = 14; goto failed; @@ -183,7 +467,7 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) failed: ubifs_err("bad superblock, error %d", err); - dbg_dump_node(c, sup); + ubifs_dump_node(c, sup); return -EINVAL; } @@ -192,7 +476,8 @@ failed: * @c: UBIFS file-system description object * * This function returns a pointer to the superblock node or a negative error - * code. + * code. Note, the user of this function is responsible of kfree()'ing the + * returned superblock buffer. */ struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) { @@ -214,6 +499,21 @@ struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) } /** + * ubifs_write_sb_node - write superblock node. + * @c: UBIFS file-system description object + * @sup: superblock node read with 'ubifs_read_sb_node()' + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup) +{ + int len = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size); + + ubifs_prepare_node(c, sup, UBIFS_SB_NODE_SZ, 1); + return ubifs_leb_change(c, UBIFS_SB_LNUM, sup, len); +} + +/** * ubifs_read_superblock - read superblock. * @c: UBIFS file-system description object * @@ -227,8 +527,14 @@ int ubifs_read_superblock(struct ubifs_info *c) struct ubifs_sb_node *sup; if (c->empty) { +#ifndef __UBOOT__ + err = create_default_filesystem(c); + if (err) + return err; +#else printf("No UBIFS filesystem found!\n"); return -1; +#endif } sup = ubifs_read_sb_node(c); @@ -243,16 +549,12 @@ int ubifs_read_superblock(struct ubifs_info *c) * due to the unavailability of time-travelling equipment. */ if (c->fmt_version > UBIFS_FORMAT_VERSION) { - struct super_block *sb = c->vfs_sb; - int mounting_ro = sb->s_flags & MS_RDONLY; - - ubifs_assert(!c->ro_media || mounting_ro); - if (!mounting_ro || + ubifs_assert(!c->ro_media || c->ro_mount); + if (!c->ro_mount || c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { - ubifs_err("on-flash format version is w%d/r%d, but " - "software only supports up to version " - "w%d/r%d", c->fmt_version, - c->ro_compat_version, UBIFS_FORMAT_VERSION, + ubifs_err("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d", + c->fmt_version, c->ro_compat_version, + UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) { ubifs_msg("only R/O mounting is possible"); @@ -310,22 +612,41 @@ int ubifs_read_superblock(struct ubifs_info *c) c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT; c->fanout = le32_to_cpu(sup->fanout); c->lsave_cnt = le32_to_cpu(sup->lsave_cnt); - c->default_compr = le16_to_cpu(sup->default_compr); c->rp_size = le64_to_cpu(sup->rp_size); - c->rp_uid = le32_to_cpu(sup->rp_uid); - c->rp_gid = le32_to_cpu(sup->rp_gid); +#ifndef __UBOOT__ + c->rp_uid = make_kuid(&init_user_ns, le32_to_cpu(sup->rp_uid)); + c->rp_gid = make_kgid(&init_user_ns, le32_to_cpu(sup->rp_gid)); +#else + c->rp_uid.val = le32_to_cpu(sup->rp_uid); + c->rp_gid.val = le32_to_cpu(sup->rp_gid); +#endif sup_flags = le32_to_cpu(sup->flags); + if (!c->mount_opts.override_compr) + c->default_compr = le16_to_cpu(sup->default_compr); c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); memcpy(&c->uuid, &sup->uuid, 16); c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); + c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP); /* Automatically increase file system size to the maximum size */ c->old_leb_cnt = c->leb_cnt; if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) { c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size); - dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs", - c->old_leb_cnt, c->leb_cnt); + if (c->ro_mount) + dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs", + c->old_leb_cnt, c->leb_cnt); +#ifndef __UBOOT__ + else { + dbg_mnt("Auto resizing (sb) from %d LEBs to %d LEBs", + c->old_leb_cnt, c->leb_cnt); + sup->leb_cnt = cpu_to_le32(c->leb_cnt); + err = ubifs_write_sb_node(c, sup); + if (err) + goto out; + c->old_leb_cnt = c->leb_cnt; + } +#endif } c->log_bytes = (long long)c->log_lebs * c->leb_size; @@ -337,10 +658,162 @@ int ubifs_read_superblock(struct ubifs_info *c) c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS; c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs; c->main_first = c->leb_cnt - c->main_lebs; - c->report_rp_size = ubifs_reported_space(c, c->rp_size); err = validate_sb(c, sup); out: kfree(sup); return err; } + +/** + * fixup_leb - fixup/unmap an LEB containing free space. + * @c: UBIFS file-system description object + * @lnum: the LEB number to fix up + * @len: number of used bytes in LEB (starting at offset 0) + * + * This function reads the contents of the given LEB number @lnum, then fixes + * it up, so that empty min. I/O units in the end of LEB are actually erased on + * flash (rather than being just all-0xff real data). If the LEB is completely + * empty, it is simply unmapped. + */ +static int fixup_leb(struct ubifs_info *c, int lnum, int len) +{ + int err; + + ubifs_assert(len >= 0); + ubifs_assert(len % c->min_io_size == 0); + ubifs_assert(len < c->leb_size); + + if (len == 0) { + dbg_mnt("unmap empty LEB %d", lnum); + return ubifs_leb_unmap(c, lnum); + } + + dbg_mnt("fixup LEB %d, data len %d", lnum, len); + err = ubifs_leb_read(c, lnum, c->sbuf, 0, len, 1); + if (err) + return err; + + return ubifs_leb_change(c, lnum, c->sbuf, len); +} + +/** + * fixup_free_space - find & remap all LEBs containing free space. + * @c: UBIFS file-system description object + * + * This function walks through all LEBs in the filesystem and fiexes up those + * containing free/empty space. + */ +static int fixup_free_space(struct ubifs_info *c) +{ + int lnum, err = 0; + struct ubifs_lprops *lprops; + + ubifs_get_lprops(c); + + /* Fixup LEBs in the master area */ + for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) { + err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz); + if (err) + goto out; + } + + /* Unmap unused log LEBs */ + lnum = ubifs_next_log_lnum(c, c->lhead_lnum); + while (lnum != c->ltail_lnum) { + err = fixup_leb(c, lnum, 0); + if (err) + goto out; + lnum = ubifs_next_log_lnum(c, lnum); + } + + /* + * Fixup the log head which contains the only a CS node at the + * beginning. + */ + err = fixup_leb(c, c->lhead_lnum, + ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size)); + if (err) + goto out; + + /* Fixup LEBs in the LPT area */ + for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { + int free = c->ltab[lnum - c->lpt_first].free; + + if (free > 0) { + err = fixup_leb(c, lnum, c->leb_size - free); + if (err) + goto out; + } + } + + /* Unmap LEBs in the orphans area */ + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { + err = fixup_leb(c, lnum, 0); + if (err) + goto out; + } + + /* Fixup LEBs in the main area */ + for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { + lprops = ubifs_lpt_lookup(c, lnum); + if (IS_ERR(lprops)) { + err = PTR_ERR(lprops); + goto out; + } + + if (lprops->free > 0) { + err = fixup_leb(c, lnum, c->leb_size - lprops->free); + if (err) + goto out; + } + } + +out: + ubifs_release_lprops(c); + return err; +} + +/** + * ubifs_fixup_free_space - find & fix all LEBs with free space. + * @c: UBIFS file-system description object + * + * This function fixes up LEBs containing free space on first mount, if the + * appropriate flag was set when the FS was created. Each LEB with one or more + * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure + * the free space is actually erased. E.g., this is necessary for some NAND + * chips, since the free space may have been programmed like real "0xff" data + * (generating a non-0xff ECC), causing future writes to the not-really-erased + * NAND pages to behave badly. After the space is fixed up, the superblock flag + * is cleared, so that this is skipped for all future mounts. + */ +int ubifs_fixup_free_space(struct ubifs_info *c) +{ + int err; + struct ubifs_sb_node *sup; + + ubifs_assert(c->space_fixup); + ubifs_assert(!c->ro_mount); + + ubifs_msg("start fixing up free space"); + + err = fixup_free_space(c); + if (err) + return err; + + sup = ubifs_read_sb_node(c); + if (IS_ERR(sup)) + return PTR_ERR(sup); + + /* Free-space fixup is no longer required */ + c->space_fixup = 0; + sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP); + + err = ubifs_write_sb_node(c, sup); + kfree(sup); + if (err) + return err; + + ubifs_msg("free space fixup complete"); + return err; +} diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 0ed8247..5523d4e 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) @@ -27,6 +16,10 @@ * debugging functions. */ +#define __UBOOT__ +#ifdef __UBOOT__ +#include <linux/err.h> +#endif #include "ubifs.h" /** @@ -75,7 +68,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, magic = le32_to_cpu(ch->magic); if (magic == 0xFFFFFFFF) { - dbg_scan("hit empty space"); + dbg_scan("hit empty space at LEB %d:%d", lnum, offs); return SCANNED_EMPTY_SPACE; } @@ -85,7 +78,8 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, if (len < UBIFS_CH_SZ) return SCANNED_GARBAGE; - dbg_scan("scanning %s", dbg_ntype(ch->node_type)); + dbg_scan("scanning %s at LEB %d:%d", + dbg_ntype(ch->node_type), lnum, offs); if (ubifs_check_node(c, buf, lnum, offs, quiet, 1)) return SCANNED_A_CORRUPT_NODE; @@ -101,22 +95,21 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, if (!quiet) { ubifs_err("bad pad node at LEB %d:%d", lnum, offs); - dbg_dump_node(c, pad); + ubifs_dump_node(c, pad); } return SCANNED_A_BAD_PAD_NODE; } /* Make the node pads to 8-byte boundary */ if ((node_len + pad_len) & 7) { - if (!quiet) { - dbg_err("bad padding length %d - %d", - offs, offs + node_len + pad_len); - } + if (!quiet) + ubifs_err("bad padding length %d - %d", + offs, offs + node_len + pad_len); return SCANNED_A_BAD_PAD_NODE; } - dbg_scan("%d bytes padded, offset now %d", - pad_len, ALIGN(offs + node_len + pad_len, 8)); + dbg_scan("%d bytes padded at LEB %d:%d, offset now %d", pad_len, + lnum, offs, ALIGN(offs + node_len + pad_len, 8)); return node_len + pad_len; } @@ -149,10 +142,10 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, INIT_LIST_HEAD(&sleb->nodes); sleb->buf = sbuf; - err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs); + err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0); if (err && err != -EBADMSG) { - ubifs_err("cannot read %d bytes from LEB %d:%d," - " error %d", c->leb_size - offs, lnum, offs, err); + ubifs_err("cannot read %d bytes from LEB %d:%d, error %d", + c->leb_size - offs, lnum, offs, err); kfree(sleb); return ERR_PTR(err); } @@ -198,7 +191,7 @@ int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, struct ubifs_ino_node *ino = buf; struct ubifs_scan_node *snod; - snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS); + snod = kmalloc(sizeof(struct ubifs_scan_node), GFP_NOFS); if (!snod) return -ENOMEM; @@ -213,13 +206,15 @@ int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, case UBIFS_DENT_NODE: case UBIFS_XENT_NODE: case UBIFS_DATA_NODE: - case UBIFS_TRUN_NODE: /* * The key is in the same place in all keyed * nodes. */ key_read(c, &ino->key, &snod->key); break; + default: + invalid_key_init(c, &snod->key); + break; } list_add_tail(&snod->list, &sleb->nodes); sleb->nodes_cnt += 1; @@ -238,13 +233,11 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, { int len; - ubifs_err("corrupted data at LEB %d:%d", lnum, offs); - if (dbg_failure_mode) - return; + ubifs_err("corruption at LEB %d:%d", lnum, offs); len = c->leb_size - offs; - if (len > 4096) - len = 4096; - dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs); + if (len > 8192) + len = 8192; + ubifs_err("first %d bytes from LEB %d:%d", len, lnum, offs); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); } @@ -253,13 +246,19 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, * @c: UBIFS file-system description object * @lnum: logical eraseblock number * @offs: offset to start at (usually zero) - * @sbuf: scan buffer (must be c->leb_size) + * @sbuf: scan buffer (must be of @c->leb_size bytes in size) + * @quiet: print no messages * * This function scans LEB number @lnum and returns complete information about - * its contents. Returns an error code in case of failure. + * its contents. Returns the scaned information in case of success and, + * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case + * of failure. + * + * If @quiet is non-zero, this function does not print large and scary + * error messages and flash dumps in case of errors. */ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, - int offs, void *sbuf) + int offs, void *sbuf, int quiet) { void *buf = sbuf + offs; int err, len = c->leb_size - offs; @@ -278,8 +277,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, cond_resched(); - ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); - + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); if (ret > 0) { /* Padding bytes or a valid padding node */ offs += ret; @@ -294,17 +292,18 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, switch (ret) { case SCANNED_GARBAGE: - dbg_err("garbage"); + ubifs_err("garbage"); goto corrupted; case SCANNED_A_NODE: break; case SCANNED_A_CORRUPT_NODE: case SCANNED_A_BAD_PAD_NODE: - dbg_err("bad node"); + ubifs_err("bad node"); goto corrupted; default: - dbg_err("unknown"); - goto corrupted; + ubifs_err("unknown"); + err = -EINVAL; + goto error; } err = ubifs_add_snod(c, sleb, buf, offs); @@ -317,8 +316,12 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, len -= node_len; } - if (offs % c->min_io_size) + if (offs % c->min_io_size) { + if (!quiet) + ubifs_err("empty space starts at non-aligned offset %d", + offs); goto corrupted; + } ubifs_end_scan(c, sleb, lnum, offs); @@ -327,18 +330,25 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, break; for (; len; offs++, buf++, len--) if (*(uint8_t *)buf != 0xff) { - ubifs_err("corrupt empty space at LEB %d:%d", - lnum, offs); + if (!quiet) + ubifs_err("corrupt empty space at LEB %d:%d", + lnum, offs); goto corrupted; } return sleb; corrupted: - ubifs_scanned_corruption(c, lnum, offs, buf); + if (!quiet) { + ubifs_scanned_corruption(c, lnum, offs, buf); + ubifs_err("LEB %d scanning failed", lnum); + } err = -EUCLEAN; + ubifs_scan_destroy(sleb); + return ERR_PTR(err); + error: - ubifs_err("LEB %d scanning failed", lnum); + ubifs_err("LEB %d scanning failed, error %d", lnum, err); ubifs_scan_destroy(sleb); return ERR_PTR(err); } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 748ab67..9c87db4 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter @@ -26,103 +15,45 @@ * corresponding subsystems, but most of it is here. */ -#include "ubifs.h" +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/ctype.h> +#include <linux/kthread.h> +#include <linux/parser.h> +#include <linux/seq_file.h> +#include <linux/mount.h> #include <linux/math64.h> +#include <linux/writeback.h> +#else -#define INODE_LOCKED_MAX 64 +#include <linux/compat.h> +#include <linux/stat.h> +#include <linux/err.h> +#include "ubifs.h" +#include <ubi_uboot.h> +#include <mtd/ubi-user.h> -struct super_block *ubifs_sb; -static struct inode *inodes_locked_down[INODE_LOCKED_MAX]; +struct dentry; +struct file; +struct iattr; +struct kstat; +struct vfsmount; -/* shrinker.c */ +#define INODE_LOCKED_MAX 64 -/* List of all UBIFS file-system instances */ -struct list_head ubifs_infos; +struct super_block *ubifs_sb; +LIST_HEAD(super_blocks); -/* linux/fs/super.c */ +static struct inode *inodes_locked_down[INODE_LOCKED_MAX]; -static int sb_set(struct super_block *sb, void *data) +int set_anon_super(struct super_block *s, void *data) { - dev_t *dev = data; - - sb->s_dev = *dev; return 0; } -/** - * sget - find or create a superblock - * @type: filesystem type superblock should belong to - * @test: comparison callback - * @set: setup callback - * @data: argument to each of them - */ -struct super_block *sget(struct file_system_type *type, - int (*test)(struct super_block *,void *), - int (*set)(struct super_block *,void *), - void *data) -{ - struct super_block *s = NULL; - int err; - - s = kzalloc(sizeof(struct super_block), GFP_USER); - if (!s) { - err = -ENOMEM; - return ERR_PTR(err); - } - - INIT_LIST_HEAD(&s->s_instances); - INIT_LIST_HEAD(&s->s_inodes); - s->s_time_gran = 1000000000; - - err = set(s, data); - if (err) { - return ERR_PTR(err); - } - s->s_type = type; - strncpy(s->s_id, type->name, sizeof(s->s_id)); - list_add(&s->s_instances, &type->fs_supers); - return s; -} - -/** - * validate_inode - validate inode. - * @c: UBIFS file-system description object - * @inode: the inode to validate - * - * This is a helper function for 'ubifs_iget()' which validates various fields - * of a newly built inode to make sure they contain sane values and prevent - * possible vulnerabilities. Returns zero if the inode is all right and - * a non-zero error code if not. - */ -static int validate_inode(struct ubifs_info *c, const struct inode *inode) -{ - int err; - const struct ubifs_inode *ui = ubifs_inode(inode); - - if (inode->i_size > c->max_inode_sz) { - ubifs_err("inode is too large (%lld)", - (long long)inode->i_size); - return 1; - } - - if (ui->compr_type < 0 || ui->compr_type >= UBIFS_COMPR_TYPES_CNT) { - ubifs_err("unknown compression type %d", ui->compr_type); - return 2; - } - - if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA) - return 4; - - if (!ubifs_compr_present(ui->compr_type)) { - ubifs_warn("inode %lu uses '%s' compression, but it was not " - "compiled in", inode->i_ino, - ubifs_compr_name(ui->compr_type)); - } - - err = dbg_check_dir_size(c, inode); - return err; -} - struct inode *iget_locked(struct super_block *sb, unsigned long ino) { struct inode *inode; @@ -138,6 +69,10 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) return inode; } +void iget_failed(struct inode *inode) +{ +} + int ubifs_iput(struct inode *inode) { list_del_init(&inode->i_sb_list); @@ -179,6 +114,125 @@ void iput(struct inode *inode) inodes_locked_down[i] = ino; } +/* from fs/inode.c */ +/** + * clear_nlink - directly zero an inode's link count + * @inode: inode + * + * This is a low-level filesystem helper to replace any + * direct filesystem manipulation of i_nlink. See + * drop_nlink() for why we care about i_nlink hitting zero. + */ +void clear_nlink(struct inode *inode) +{ + if (inode->i_nlink) { + inode->__i_nlink = 0; + atomic_long_inc(&inode->i_sb->s_remove_count); + } +} +EXPORT_SYMBOL(clear_nlink); + +/** + * set_nlink - directly set an inode's link count + * @inode: inode + * @nlink: new nlink (should be non-zero) + * + * This is a low-level filesystem helper to replace any + * direct filesystem manipulation of i_nlink. + */ +void set_nlink(struct inode *inode, unsigned int nlink) +{ + if (!nlink) { + clear_nlink(inode); + } else { + /* Yes, some filesystems do change nlink from zero to one */ + if (inode->i_nlink == 0) + atomic_long_dec(&inode->i_sb->s_remove_count); + + inode->__i_nlink = nlink; + } +} +EXPORT_SYMBOL(set_nlink); + +/* from include/linux/fs.h */ +static inline void i_uid_write(struct inode *inode, uid_t uid) +{ + inode->i_uid.val = uid; +} + +static inline void i_gid_write(struct inode *inode, gid_t gid) +{ + inode->i_gid.val = gid; +} + +void unlock_new_inode(struct inode *inode) +{ + return; +} +#endif + +/* + * Maximum amount of memory we may 'kmalloc()' without worrying that we are + * allocating too much. + */ +#define UBIFS_KMALLOC_OK (128*1024) + +/* Slab cache for UBIFS inodes */ +struct kmem_cache *ubifs_inode_slab; + +#ifndef __UBOOT__ +/* UBIFS TNC shrinker description */ +static struct shrinker ubifs_shrinker_info = { + .scan_objects = ubifs_shrink_scan, + .count_objects = ubifs_shrink_count, + .seeks = DEFAULT_SEEKS, +}; +#endif + +/** + * validate_inode - validate inode. + * @c: UBIFS file-system description object + * @inode: the inode to validate + * + * This is a helper function for 'ubifs_iget()' which validates various fields + * of a newly built inode to make sure they contain sane values and prevent + * possible vulnerabilities. Returns zero if the inode is all right and + * a non-zero error code if not. + */ +static int validate_inode(struct ubifs_info *c, const struct inode *inode) +{ + int err; + const struct ubifs_inode *ui = ubifs_inode(inode); + + if (inode->i_size > c->max_inode_sz) { + ubifs_err("inode is too large (%lld)", + (long long)inode->i_size); + return 1; + } + + if (ui->compr_type < 0 || ui->compr_type >= UBIFS_COMPR_TYPES_CNT) { + ubifs_err("unknown compression type %d", ui->compr_type); + return 2; + } + + if (ui->xattr_names + ui->xattr_cnt > XATTR_LIST_MAX) + return 3; + + if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA) + return 4; + + if (ui->xattr && !S_ISREG(inode->i_mode)) + return 5; + + if (!ubifs_compr_present(ui->compr_type)) { + ubifs_warn("inode %lu uses '%s' compression, but it was not compiled in", + inode->i_ino, ubifs_compr_name(ui->compr_type)); + } + + err = dbg_check_dir(c, inode); + return err; +} + struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) { int err; @@ -187,10 +241,13 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) struct ubifs_info *c = sb->s_fs_info; struct inode *inode; struct ubifs_inode *ui; +#ifdef __UBOOT__ int i; +#endif dbg_gen("inode %lu", inum); +#ifdef __UBOOT__ /* * U-Boot special handling of locked down inodes via recovery * e.g. ubifs_recover_size() @@ -211,6 +268,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) return inodes_locked_down[i]; } } +#endif inode = iget_locked(sb, inum); if (!inode) @@ -232,9 +290,9 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) goto out_ino; inode->i_flags |= (S_NOCMTIME | S_NOATIME); - inode->i_nlink = le32_to_cpu(ino->nlink); - inode->i_uid = le32_to_cpu(ino->uid); - inode->i_gid = le32_to_cpu(ino->gid); + set_nlink(inode, le32_to_cpu(ino->nlink)); + i_uid_write(inode, le32_to_cpu(ino->uid)); + i_gid_write(inode, le32_to_cpu(ino->gid)); inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec); inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec); inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec); @@ -248,12 +306,101 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) ui->flags = le32_to_cpu(ino->flags); ui->compr_type = le16_to_cpu(ino->compr_type); ui->creat_sqnum = le64_to_cpu(ino->creat_sqnum); + ui->xattr_cnt = le32_to_cpu(ino->xattr_cnt); + ui->xattr_size = le32_to_cpu(ino->xattr_size); + ui->xattr_names = le32_to_cpu(ino->xattr_names); ui->synced_i_size = ui->ui_size = inode->i_size; + ui->xattr = (ui->flags & UBIFS_XATTR_FL) ? 1 : 0; + err = validate_inode(c, inode); if (err) goto out_invalid; +#ifndef __UBOOT__ + /* Disable read-ahead */ + inode->i_mapping->backing_dev_info = &c->bdi; + + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + inode->i_mapping->a_ops = &ubifs_file_address_operations; + inode->i_op = &ubifs_file_inode_operations; + inode->i_fop = &ubifs_file_operations; + if (ui->xattr) { + ui->data = kmalloc(ui->data_len + 1, GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_ino; + } + memcpy(ui->data, ino->data, ui->data_len); + ((char *)ui->data)[ui->data_len] = '\0'; + } else if (ui->data_len != 0) { + err = 10; + goto out_invalid; + } + break; + case S_IFDIR: + inode->i_op = &ubifs_dir_inode_operations; + inode->i_fop = &ubifs_dir_operations; + if (ui->data_len != 0) { + err = 11; + goto out_invalid; + } + break; + case S_IFLNK: + inode->i_op = &ubifs_symlink_inode_operations; + if (ui->data_len <= 0 || ui->data_len > UBIFS_MAX_INO_DATA) { + err = 12; + goto out_invalid; + } + ui->data = kmalloc(ui->data_len + 1, GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_ino; + } + memcpy(ui->data, ino->data, ui->data_len); + ((char *)ui->data)[ui->data_len] = '\0'; + break; + case S_IFBLK: + case S_IFCHR: + { + dev_t rdev; + union ubifs_dev_desc *dev; + + ui->data = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_ino; + } + + dev = (union ubifs_dev_desc *)ino->data; + if (ui->data_len == sizeof(dev->new)) + rdev = new_decode_dev(le32_to_cpu(dev->new)); + else if (ui->data_len == sizeof(dev->huge)) + rdev = huge_decode_dev(le64_to_cpu(dev->huge)); + else { + err = 13; + goto out_invalid; + } + memcpy(ui->data, ino->data, ui->data_len); + inode->i_op = &ubifs_file_inode_operations; + init_special_inode(inode, inode->i_mode, rdev); + break; + } + case S_IFSOCK: + case S_IFIFO: + inode->i_op = &ubifs_file_inode_operations; + init_special_inode(inode, inode->i_mode, 0); + if (ui->data_len != 0) { + err = 14; + goto out_invalid; + } + break; + default: + err = 15; + goto out_invalid; + } +#else if ((inode->i_mode & S_IFMT) == S_IFLNK) { if (ui->data_len <= 0 || ui->data_len > UBIFS_MAX_INO_DATA) { err = 12; @@ -267,23 +414,258 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) memcpy(ui->data, ino->data, ui->data_len); ((char *)ui->data)[ui->data_len] = '\0'; } +#endif kfree(ino); - inode->i_state &= ~(I_LOCK | I_NEW); +#ifndef __UBOOT__ + ubifs_set_inode_flags(inode); +#endif + unlock_new_inode(inode); return inode; out_invalid: ubifs_err("inode %lu validation failed, error %d", inode->i_ino, err); - dbg_dump_node(c, ino); - dbg_dump_inode(c, inode); + ubifs_dump_node(c, ino); + ubifs_dump_inode(c, inode); err = -EINVAL; out_ino: kfree(ino); out: ubifs_err("failed to read inode %lu, error %d", inode->i_ino, err); + iget_failed(inode); return ERR_PTR(err); } +static struct inode *ubifs_alloc_inode(struct super_block *sb) +{ + struct ubifs_inode *ui; + + ui = kmem_cache_alloc(ubifs_inode_slab, GFP_NOFS); + if (!ui) + return NULL; + + memset((void *)ui + sizeof(struct inode), 0, + sizeof(struct ubifs_inode) - sizeof(struct inode)); + mutex_init(&ui->ui_mutex); + spin_lock_init(&ui->ui_lock); + return &ui->vfs_inode; +}; + +#ifndef __UBOOT__ +static void ubifs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct ubifs_inode *ui = ubifs_inode(inode); + kmem_cache_free(ubifs_inode_slab, ui); +} + +static void ubifs_destroy_inode(struct inode *inode) +{ + struct ubifs_inode *ui = ubifs_inode(inode); + + kfree(ui->data); + call_rcu(&inode->i_rcu, ubifs_i_callback); +} + +/* + * Note, Linux write-back code calls this without 'i_mutex'. + */ +static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + int err = 0; + struct ubifs_info *c = inode->i_sb->s_fs_info; + struct ubifs_inode *ui = ubifs_inode(inode); + + ubifs_assert(!ui->xattr); + if (is_bad_inode(inode)) + return 0; + + mutex_lock(&ui->ui_mutex); + /* + * Due to races between write-back forced by budgeting + * (see 'sync_some_inodes()') and background write-back, the inode may + * have already been synchronized, do not do this again. This might + * also happen if it was synchronized in an VFS operation, e.g. + * 'ubifs_link()'. + */ + if (!ui->dirty) { + mutex_unlock(&ui->ui_mutex); + return 0; + } + + /* + * As an optimization, do not write orphan inodes to the media just + * because this is not needed. + */ + dbg_gen("inode %lu, mode %#x, nlink %u", + inode->i_ino, (int)inode->i_mode, inode->i_nlink); + if (inode->i_nlink) { + err = ubifs_jnl_write_inode(c, inode); + if (err) + ubifs_err("can't write inode %lu, error %d", + inode->i_ino, err); + else + err = dbg_check_inode_size(c, inode, ui->ui_size); + } + + ui->dirty = 0; + mutex_unlock(&ui->ui_mutex); + ubifs_release_dirty_inode_budget(c, ui); + return err; +} + +static void ubifs_evict_inode(struct inode *inode) +{ + int err; + struct ubifs_info *c = inode->i_sb->s_fs_info; + struct ubifs_inode *ui = ubifs_inode(inode); + + if (ui->xattr) + /* + * Extended attribute inode deletions are fully handled in + * 'ubifs_removexattr()'. These inodes are special and have + * limited usage, so there is nothing to do here. + */ + goto out; + + dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); + ubifs_assert(!atomic_read(&inode->i_count)); + + truncate_inode_pages(&inode->i_data, 0); + + if (inode->i_nlink) + goto done; + + if (is_bad_inode(inode)) + goto out; + + ui->ui_size = inode->i_size = 0; + err = ubifs_jnl_delete_inode(c, inode); + if (err) + /* + * Worst case we have a lost orphan inode wasting space, so a + * simple error message is OK here. + */ + ubifs_err("can't delete inode %lu, error %d", + inode->i_ino, err); + +out: + if (ui->dirty) + ubifs_release_dirty_inode_budget(c, ui); + else { + /* We've deleted something - clean the "no space" flags */ + c->bi.nospace = c->bi.nospace_rp = 0; + smp_wmb(); + } +done: + clear_inode(inode); +} +#endif + +static void ubifs_dirty_inode(struct inode *inode, int flags) +{ + struct ubifs_inode *ui = ubifs_inode(inode); + + ubifs_assert(mutex_is_locked(&ui->ui_mutex)); + if (!ui->dirty) { + ui->dirty = 1; + dbg_gen("inode %lu", inode->i_ino); + } +} + +#ifndef __UBOOT__ +static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct ubifs_info *c = dentry->d_sb->s_fs_info; + unsigned long long free; + __le32 *uuid = (__le32 *)c->uuid; + + free = ubifs_get_free_space(c); + dbg_gen("free space %lld bytes (%lld blocks)", + free, free >> UBIFS_BLOCK_SHIFT); + + buf->f_type = UBIFS_SUPER_MAGIC; + buf->f_bsize = UBIFS_BLOCK_SIZE; + buf->f_blocks = c->block_cnt; + buf->f_bfree = free >> UBIFS_BLOCK_SHIFT; + if (free > c->report_rp_size) + buf->f_bavail = (free - c->report_rp_size) >> UBIFS_BLOCK_SHIFT; + else + buf->f_bavail = 0; + buf->f_files = 0; + buf->f_ffree = 0; + buf->f_namelen = UBIFS_MAX_NLEN; + buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]); + buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]); + ubifs_assert(buf->f_bfree <= c->block_cnt); + return 0; +} + +static int ubifs_show_options(struct seq_file *s, struct dentry *root) +{ + struct ubifs_info *c = root->d_sb->s_fs_info; + + if (c->mount_opts.unmount_mode == 2) + seq_printf(s, ",fast_unmount"); + else if (c->mount_opts.unmount_mode == 1) + seq_printf(s, ",norm_unmount"); + + if (c->mount_opts.bulk_read == 2) + seq_printf(s, ",bulk_read"); + else if (c->mount_opts.bulk_read == 1) + seq_printf(s, ",no_bulk_read"); + + if (c->mount_opts.chk_data_crc == 2) + seq_printf(s, ",chk_data_crc"); + else if (c->mount_opts.chk_data_crc == 1) + seq_printf(s, ",no_chk_data_crc"); + + if (c->mount_opts.override_compr) { + seq_printf(s, ",compr=%s", + ubifs_compr_name(c->mount_opts.compr_type)); + } + + return 0; +} + +static int ubifs_sync_fs(struct super_block *sb, int wait) +{ + int i, err; + struct ubifs_info *c = sb->s_fs_info; + + /* + * Zero @wait is just an advisory thing to help the file system shove + * lots of data into the queues, and there will be the second + * '->sync_fs()' call, with non-zero @wait. + */ + if (!wait) + return 0; + + /* + * Synchronize write buffers, because 'ubifs_run_commit()' does not + * do this if it waits for an already running commit. + */ + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + return err; + } + + /* + * Strictly speaking, it is not necessary to commit the journal here, + * synchronizing write-buffers would be enough. But committing makes + * UBIFS free space predictions much more accurate, so we want to let + * the user be able to get more accurate results of 'statfs()' after + * they synchronize the file system. + */ + err = ubifs_run_commit(c); + if (err) + return err; + + return ubi_sync(c->vi.ubi_num); +} +#endif + /** * init_constants_early - initialize UBIFS constants. * @c: UBIFS file-system description object @@ -312,9 +694,12 @@ static int init_constants_early(struct ubifs_info *c) c->leb_cnt = c->vi.size; c->leb_size = c->vi.usable_leb_size; + c->leb_start = c->di.leb_start; c->half_leb_size = c->leb_size / 2; c->min_io_size = c->di.min_io_size; c->min_io_shift = fls(c->min_io_size) - 1; + c->max_write_size = c->di.max_write_size; + c->max_write_shift = fls(c->max_write_size) - 1; if (c->leb_size < UBIFS_MIN_LEB_SZ) { ubifs_err("too small LEBs (%d bytes), min. is %d bytes", @@ -334,6 +719,18 @@ static int init_constants_early(struct ubifs_info *c) } /* + * Maximum write size has to be greater or equivalent to min. I/O + * size, and be multiple of min. I/O size. + */ + if (c->max_write_size < c->min_io_size || + c->max_write_size % c->min_io_size || + !is_power_of_2(c->max_write_size)) { + ubifs_err("bad write buffer size %d for %d min. I/O unit", + c->max_write_size, c->min_io_size); + return -EINVAL; + } + + /* * UBIFS aligns all node to 8-byte boundary, so to make function in * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is * less than 8. @@ -341,6 +738,10 @@ static int init_constants_early(struct ubifs_info *c) if (c->min_io_size < 8) { c->min_io_size = 8; c->min_io_shift = 3; + if (c->max_write_size < c->min_io_size) { + c->max_write_size = c->min_io_size; + c->max_write_shift = c->min_io_shift; + } } c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size); @@ -393,9 +794,33 @@ static int init_constants_early(struct ubifs_info *c) */ c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; + /* Buffer size for bulk-reads */ + c->max_bu_buf_len = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ; + if (c->max_bu_buf_len > c->leb_size) + c->max_bu_buf_len = c->leb_size; return 0; } +/** + * bud_wbuf_callback - bud LEB write-buffer synchronization call-back. + * @c: UBIFS file-system description object + * @lnum: LEB the write-buffer was synchronized to + * @free: how many free bytes left in this LEB + * @pad: how many bytes were padded + * + * This is a callback function which is called by the I/O unit when the + * write-buffer is synchronized. We need this to correctly maintain space + * accounting in bud logical eraseblocks. This function returns zero in case of + * success and a negative error code in case of failure. + * + * This function actually belongs to the journal, but we keep it here because + * we want to keep it static. + */ +static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad) +{ + return ubifs_update_one_lp(c, lnum, free, pad, 0, 0); +} + /* * init_constants_sb - initialize UBIFS constants. * @c: UBIFS file-system description object @@ -426,8 +851,8 @@ static int init_constants_sb(struct ubifs_info *c) tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt; tmp = ALIGN(tmp, c->min_io_size); if (tmp > c->leb_size) { - dbg_err("too small LEB size %d, at least %d needed", - c->leb_size, tmp); + ubifs_err("too small LEB size %d, at least %d needed", + c->leb_size, tmp); return -EINVAL; } @@ -441,8 +866,8 @@ static int init_constants_sb(struct ubifs_info *c) tmp /= c->leb_size; tmp += 1; if (c->log_lebs < tmp) { - dbg_err("too small log %d LEBs, required min. %d LEBs", - c->log_lebs, tmp); + ubifs_err("too small log %d LEBs, required min. %d LEBs", + c->log_lebs, tmp); return -EINVAL; } @@ -451,11 +876,11 @@ static int init_constants_sb(struct ubifs_info *c) * be compressed and direntries are of the maximum size. * * Note, data, which may be stored in inodes is budgeted separately, so - * it is not included into 'c->inode_budget'. + * it is not included into 'c->bi.inode_budget'. */ - c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; - c->inode_budget = UBIFS_INO_NODE_SZ; - c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; + c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; + c->bi.inode_budget = UBIFS_INO_NODE_SZ; + c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ; /* * When the amount of flash space used by buds becomes @@ -482,6 +907,8 @@ static int init_constants_sb(struct ubifs_info *c) if (err) return err; + /* Initialize effective LEB size used in budgeting calculations */ + c->idx_leb_size = c->leb_size - c->max_idx_node_sz; return 0; } @@ -497,7 +924,8 @@ static void init_constants_master(struct ubifs_info *c) { long long tmp64; - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); + c->report_rp_size = ubifs_reported_space(c, c->rp_size); /* * Calculate total amount of FS blocks. This number is not used @@ -515,6 +943,88 @@ static void init_constants_master(struct ubifs_info *c) } /** + * take_gc_lnum - reserve GC LEB. + * @c: UBIFS file-system description object + * + * This function ensures that the LEB reserved for garbage collection is marked + * as "taken" in lprops. We also have to set free space to LEB size and dirty + * space to zero, because lprops may contain out-of-date information if the + * file-system was un-mounted before it has been committed. This function + * returns zero in case of success and a negative error code in case of + * failure. + */ +static int take_gc_lnum(struct ubifs_info *c) +{ + int err; + + if (c->gc_lnum == -1) { + ubifs_err("no LEB for GC"); + return -EINVAL; + } + + /* And we have to tell lprops that this LEB is taken */ + err = ubifs_change_one_lp(c, c->gc_lnum, c->leb_size, 0, + LPROPS_TAKEN, 0, 0); + return err; +} + +/** + * alloc_wbufs - allocate write-buffers. + * @c: UBIFS file-system description object + * + * This helper function allocates and initializes UBIFS write-buffers. Returns + * zero in case of success and %-ENOMEM in case of failure. + */ +static int alloc_wbufs(struct ubifs_info *c) +{ + int i, err; + + c->jheads = kzalloc(c->jhead_cnt * sizeof(struct ubifs_jhead), + GFP_KERNEL); + if (!c->jheads) + return -ENOMEM; + + /* Initialize journal heads */ + for (i = 0; i < c->jhead_cnt; i++) { + INIT_LIST_HEAD(&c->jheads[i].buds_list); + err = ubifs_wbuf_init(c, &c->jheads[i].wbuf); + if (err) + return err; + + c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; + c->jheads[i].wbuf.jhead = i; + c->jheads[i].grouped = 1; + } + + /* + * Garbage Collector head does not need to be synchronized by timer. + * Also GC head nodes are not grouped. + */ + c->jheads[GCHD].wbuf.no_timer = 1; + c->jheads[GCHD].grouped = 0; + + return 0; +} + +/** + * free_wbufs - free write-buffers. + * @c: UBIFS file-system description object + */ +static void free_wbufs(struct ubifs_info *c) +{ + int i; + + if (c->jheads) { + for (i = 0; i < c->jhead_cnt; i++) { + kfree(c->jheads[i].wbuf.buf); + kfree(c->jheads[i].wbuf.inodes); + } + kfree(c->jheads); + c->jheads = NULL; + } +} + +/** * free_orphans - free orphans. * @c: UBIFS file-system description object */ @@ -533,13 +1043,27 @@ static void free_orphans(struct ubifs_info *c) orph = list_entry(c->orph_list.next, struct ubifs_orphan, list); list_del(&orph->list); kfree(orph); - dbg_err("orphan list not empty at unmount"); + ubifs_err("orphan list not empty at unmount"); } vfree(c->orph_buf); c->orph_buf = NULL; } +#ifndef __UBOOT__ +/** + * free_buds - free per-bud objects. + * @c: UBIFS file-system description object + */ +static void free_buds(struct ubifs_info *c) +{ + struct ubifs_bud *bud, *n; + + rbtree_postorder_for_each_entry_safe(bud, n, &c->buds, rb) + kfree(bud); +} +#endif + /** * check_volume_empty - check if the UBI volume is empty. * @c: UBIFS file-system description object @@ -555,7 +1079,7 @@ static int check_volume_empty(struct ubifs_info *c) c->empty = 1; for (lnum = 0; lnum < c->leb_cnt; lnum++) { - err = ubi_is_mapped(c->ubi, lnum); + err = ubifs_is_mapped(c, lnum); if (unlikely(err < 0)) return err; if (err == 1) { @@ -569,23 +1093,258 @@ static int check_volume_empty(struct ubifs_info *c) return 0; } +/* + * UBIFS mount options. + * + * Opt_fast_unmount: do not run a journal commit before un-mounting + * Opt_norm_unmount: run a journal commit before un-mounting + * Opt_bulk_read: enable bulk-reads + * Opt_no_bulk_read: disable bulk-reads + * Opt_chk_data_crc: check CRCs when reading data nodes + * Opt_no_chk_data_crc: do not check CRCs when reading data nodes + * Opt_override_compr: override default compressor + * Opt_err: just end of array marker + */ +enum { + Opt_fast_unmount, + Opt_norm_unmount, + Opt_bulk_read, + Opt_no_bulk_read, + Opt_chk_data_crc, + Opt_no_chk_data_crc, + Opt_override_compr, + Opt_err, +}; + +#ifndef __UBOOT__ +static const match_table_t tokens = { + {Opt_fast_unmount, "fast_unmount"}, + {Opt_norm_unmount, "norm_unmount"}, + {Opt_bulk_read, "bulk_read"}, + {Opt_no_bulk_read, "no_bulk_read"}, + {Opt_chk_data_crc, "chk_data_crc"}, + {Opt_no_chk_data_crc, "no_chk_data_crc"}, + {Opt_override_compr, "compr=%s"}, + {Opt_err, NULL}, +}; + +/** + * parse_standard_option - parse a standard mount option. + * @option: the option to parse + * + * Normally, standard mount options like "sync" are passed to file-systems as + * flags. However, when a "rootflags=" kernel boot parameter is used, they may + * be present in the options string. This function tries to deal with this + * situation and parse standard options. Returns 0 if the option was not + * recognized, and the corresponding integer flag if it was. + * + * UBIFS is only interested in the "sync" option, so do not check for anything + * else. + */ +static int parse_standard_option(const char *option) +{ + ubifs_msg("parse %s", option); + if (!strcmp(option, "sync")) + return MS_SYNCHRONOUS; + return 0; +} + +/** + * ubifs_parse_options - parse mount parameters. + * @c: UBIFS file-system description object + * @options: parameters to parse + * @is_remount: non-zero if this is FS re-mount + * + * This function parses UBIFS mount options and returns zero in case success + * and a negative error code in case of failure. + */ +static int ubifs_parse_options(struct ubifs_info *c, char *options, + int is_remount) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + + if (!options) + return 0; + + while ((p = strsep(&options, ","))) { + int token; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + /* + * %Opt_fast_unmount and %Opt_norm_unmount options are ignored. + * We accept them in order to be backward-compatible. But this + * should be removed at some point. + */ + case Opt_fast_unmount: + c->mount_opts.unmount_mode = 2; + break; + case Opt_norm_unmount: + c->mount_opts.unmount_mode = 1; + break; + case Opt_bulk_read: + c->mount_opts.bulk_read = 2; + c->bulk_read = 1; + break; + case Opt_no_bulk_read: + c->mount_opts.bulk_read = 1; + c->bulk_read = 0; + break; + case Opt_chk_data_crc: + c->mount_opts.chk_data_crc = 2; + c->no_chk_data_crc = 0; + break; + case Opt_no_chk_data_crc: + c->mount_opts.chk_data_crc = 1; + c->no_chk_data_crc = 1; + break; + case Opt_override_compr: + { + char *name = match_strdup(&args[0]); + + if (!name) + return -ENOMEM; + if (!strcmp(name, "none")) + c->mount_opts.compr_type = UBIFS_COMPR_NONE; + else if (!strcmp(name, "lzo")) + c->mount_opts.compr_type = UBIFS_COMPR_LZO; + else if (!strcmp(name, "zlib")) + c->mount_opts.compr_type = UBIFS_COMPR_ZLIB; + else { + ubifs_err("unknown compressor \"%s\"", name); + kfree(name); + return -EINVAL; + } + kfree(name); + c->mount_opts.override_compr = 1; + c->default_compr = c->mount_opts.compr_type; + break; + } + default: + { + unsigned long flag; + struct super_block *sb = c->vfs_sb; + + flag = parse_standard_option(p); + if (!flag) { + ubifs_err("unrecognized mount option \"%s\" or missing value", + p); + return -EINVAL; + } + sb->s_flags |= flag; + break; + } + } + } + + return 0; +} + +/** + * destroy_journal - destroy journal data structures. + * @c: UBIFS file-system description object + * + * This function destroys journal data structures including those that may have + * been created by recovery functions. + */ +static void destroy_journal(struct ubifs_info *c) +{ + while (!list_empty(&c->unclean_leb_list)) { + struct ubifs_unclean_leb *ucleb; + + ucleb = list_entry(c->unclean_leb_list.next, + struct ubifs_unclean_leb, list); + list_del(&ucleb->list); + kfree(ucleb); + } + while (!list_empty(&c->old_buds)) { + struct ubifs_bud *bud; + + bud = list_entry(c->old_buds.next, struct ubifs_bud, list); + list_del(&bud->list); + kfree(bud); + } + ubifs_destroy_idx_gc(c); + ubifs_destroy_size_tree(c); + ubifs_tnc_close(c); + free_buds(c); +} +#endif + +/** + * bu_init - initialize bulk-read information. + * @c: UBIFS file-system description object + */ +static void bu_init(struct ubifs_info *c) +{ + ubifs_assert(c->bulk_read == 1); + + if (c->bu.buf) + return; /* Already initialized */ + +again: + c->bu.buf = kmalloc(c->max_bu_buf_len, GFP_KERNEL | __GFP_NOWARN); + if (!c->bu.buf) { + if (c->max_bu_buf_len > UBIFS_KMALLOC_OK) { + c->max_bu_buf_len = UBIFS_KMALLOC_OK; + goto again; + } + + /* Just disable bulk-read */ + ubifs_warn("cannot allocate %d bytes of memory for bulk-read, disabling it", + c->max_bu_buf_len); + c->mount_opts.bulk_read = 1; + c->bulk_read = 0; + return; + } +} + +#ifndef __UBOOT__ +/** + * check_free_space - check if there is enough free space to mount. + * @c: UBIFS file-system description object + * + * This function makes sure UBIFS has enough free space to be mounted in + * read/write mode. UBIFS must always have some free space to allow deletions. + */ +static int check_free_space(struct ubifs_info *c) +{ + ubifs_assert(c->dark_wm > 0); + if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { + ubifs_err("insufficient free space to mount in R/W mode"); + ubifs_dump_budg(c, &c->bi); + ubifs_dump_lprops(c); + return -ENOSPC; + } + return 0; +} +#endif + /** * mount_ubifs - mount UBIFS file-system. * @c: UBIFS file-system description object * * This function mounts UBIFS file system. Returns zero in case of success and * a negative error code in case of failure. - * - * Note, the function does not de-allocate resources it it fails half way - * through, and the caller has to do this instead. */ static int mount_ubifs(struct ubifs_info *c) { - struct super_block *sb = c->vfs_sb; - int err, mounted_read_only = (sb->s_flags & MS_RDONLY); - long long x; + int err; + long long x, y; size_t sz; + c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY); +#ifdef __UBOOT__ + if (!c->ro_mount) { + printf("UBIFS: only ro mode in U-Boot allowed.\n"); + return -EACCES; + } +#endif + err = init_constants_early(c); if (err) return err; @@ -598,7 +1357,7 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_free; - if (c->empty && (mounted_read_only || c->ro_media)) { + if (c->empty && (c->ro_mount || c->ro_media)) { /* * This UBI volume is empty, and read-only, or the file system * is mounted read-only - we cannot format it. @@ -609,7 +1368,7 @@ static int mount_ubifs(struct ubifs_info *c) goto out_free; } - if (c->ro_media && !mounted_read_only) { + if (c->ro_media && !c->ro_mount) { ubifs_err("cannot mount read-write - read-only media"); err = -EROFS; goto out_free; @@ -629,11 +1388,27 @@ static int mount_ubifs(struct ubifs_info *c) if (!c->sbuf) goto out_free; - /* - * We have to check all CRCs, even for data nodes, when we mount the FS - * (specifically, when we are replaying). - */ - c->always_chk_crc = 1; +#ifndef __UBOOT__ + if (!c->ro_mount) { + c->ileb_buf = vmalloc(c->leb_size); + if (!c->ileb_buf) + goto out_free; + } +#endif + + if (c->bulk_read == 1) + bu_init(c); + +#ifndef __UBOOT__ + if (!c->ro_mount) { + c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, + GFP_KERNEL); + if (!c->write_reserve_buf) + goto out_free; + } +#endif + + c->mounting = 1; err = ubifs_read_superblock(c); if (err) @@ -646,11 +1421,10 @@ static int mount_ubifs(struct ubifs_info *c) if (!ubifs_compr_present(c->default_compr)) { ubifs_err("'compressor \"%s\" is not compiled in", ubifs_compr_name(c->default_compr)); + err = -ENOTSUPP; goto out_free; } - dbg_failure_mode_registration(c); - err = init_constants_sb(c); if (err) goto out_free; @@ -663,7 +1437,25 @@ static int mount_ubifs(struct ubifs_info *c) goto out_free; } + err = alloc_wbufs(c); + if (err) + goto out_cbuf; + sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); +#ifndef __UBOOT__ + if (!c->ro_mount) { + /* Create background thread */ + c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); + if (IS_ERR(c->bgt)) { + err = PTR_ERR(c->bgt); + c->bgt = NULL; + ubifs_err("cannot spawn \"%s\", error %d", + c->bgt_name, err); + goto out_wbufs; + } + wake_up_process(c->bgt); + } +#endif err = ubifs_read_master(c); if (err) @@ -676,118 +1468,208 @@ static int mount_ubifs(struct ubifs_info *c) c->need_recovery = 1; } - err = ubifs_lpt_init(c, 1, !mounted_read_only); +#ifndef __UBOOT__ + if (c->need_recovery && !c->ro_mount) { + err = ubifs_recover_inl_heads(c, c->sbuf); + if (err) + goto out_master; + } +#endif + + err = ubifs_lpt_init(c, 1, !c->ro_mount); if (err) - goto out_lpt; + goto out_master; + +#ifndef __UBOOT__ + if (!c->ro_mount && c->space_fixup) { + err = ubifs_fixup_free_space(c); + if (err) + goto out_lpt; + } + + if (!c->ro_mount) { + /* + * Set the "dirty" flag so that if we reboot uncleanly we + * will notice this immediately on the next mount. + */ + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + err = ubifs_write_master(c); + if (err) + goto out_lpt; + } +#endif - err = dbg_check_idx_size(c, c->old_idx_sz); + err = dbg_check_idx_size(c, c->bi.old_idx_sz); if (err) goto out_lpt; +#ifndef __UBOOT__ err = ubifs_replay_journal(c); if (err) goto out_journal; +#endif + + /* Calculate 'min_idx_lebs' after journal replay */ + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); - err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only); + err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount); if (err) goto out_orphans; - if (c->need_recovery) { + if (!c->ro_mount) { +#ifndef __UBOOT__ + int lnum; + + err = check_free_space(c); + if (err) + goto out_orphans; + + /* Check for enough log space */ + lnum = c->lhead_lnum + 1; + if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) + lnum = UBIFS_LOG_LNUM; + if (lnum == c->ltail_lnum) { + err = ubifs_consolidate_log(c); + if (err) + goto out_orphans; + } + + if (c->need_recovery) { + err = ubifs_recover_size(c); + if (err) + goto out_orphans; + err = ubifs_rcvry_gc_commit(c); + if (err) + goto out_orphans; + } else { + err = take_gc_lnum(c); + if (err) + goto out_orphans; + + /* + * GC LEB may contain garbage if there was an unclean + * reboot, and it should be un-mapped. + */ + err = ubifs_leb_unmap(c, c->gc_lnum); + if (err) + goto out_orphans; + } + + err = dbg_check_lprops(c); + if (err) + goto out_orphans; +#endif + } else if (c->need_recovery) { err = ubifs_recover_size(c); if (err) goto out_orphans; + } else { + /* + * Even if we mount read-only, we have to set space in GC LEB + * to proper value because this affects UBIFS free space + * reporting. We do not want to have a situation when + * re-mounting from R/O to R/W changes amount of free space. + */ + err = take_gc_lnum(c); + if (err) + goto out_orphans; } +#ifndef __UBOOT__ spin_lock(&ubifs_infos_lock); list_add_tail(&c->infos_list, &ubifs_infos); spin_unlock(&ubifs_infos_lock); +#endif if (c->need_recovery) { - if (mounted_read_only) + if (c->ro_mount) ubifs_msg("recovery deferred"); else { c->need_recovery = 0; ubifs_msg("recovery completed"); + /* + * GC LEB has to be empty and taken at this point. But + * the journal head LEBs may also be accounted as + * "empty taken" if they are empty. + */ + ubifs_assert(c->lst.taken_empty_lebs > 0); } - } + } else + ubifs_assert(c->lst.taken_empty_lebs > 0); err = dbg_check_filesystem(c); if (err) goto out_infos; - c->always_chk_crc = 0; + err = dbg_debugfs_init_fs(c); + if (err) + goto out_infos; + + c->mounting = 0; - ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", - c->vi.ubi_num, c->vi.vol_id, c->vi.name); - if (mounted_read_only) - ubifs_msg("mounted read-only"); + ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s", + c->vi.ubi_num, c->vi.vol_id, c->vi.name, + c->ro_mount ? ", R/O mode" : ""); x = (long long)c->main_lebs * c->leb_size; - ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d " - "LEBs)", x, x >> 10, x >> 20, c->main_lebs); - x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; - ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " - "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); - ubifs_msg("media format: w%d/r%d (latest is w%d/r%d)", + y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; + ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes", + c->leb_size, c->leb_size >> 10, c->min_io_size, + c->max_write_size); + ubifs_msg("FS size: %lld bytes (%lld MiB, %d LEBs), journal size %lld bytes (%lld MiB, %d LEBs)", + x, x >> 20, c->main_lebs, + y, y >> 20, c->log_lebs + c->max_bud_cnt); + ubifs_msg("reserved for root: %llu bytes (%llu KiB)", + c->report_rp_size, c->report_rp_size >> 10); + ubifs_msg("media format: w%d/r%d (latest is w%d/r%d), UUID %pUB%s", c->fmt_version, c->ro_compat_version, - UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); - ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); - ubifs_msg("reserved for root: %llu bytes (%llu KiB)", - c->report_rp_size, c->report_rp_size >> 10); - - dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); - dbg_msg("LEB size: %d bytes (%d KiB)", - c->leb_size, c->leb_size >> 10); - dbg_msg("data journal heads: %d", + UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION, c->uuid, + c->big_lpt ? ", big LPT model" : ", small LPT model"); + + dbg_gen("default compressor: %s", ubifs_compr_name(c->default_compr)); + dbg_gen("data journal heads: %d", c->jhead_cnt - NONDATA_JHEADS_CNT); - dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X" - "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X", - c->uuid[0], c->uuid[1], c->uuid[2], c->uuid[3], - c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7], - c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11], - c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]); - dbg_msg("big_lpt %d", c->big_lpt); - dbg_msg("log LEBs: %d (%d - %d)", + dbg_gen("log LEBs: %d (%d - %d)", c->log_lebs, UBIFS_LOG_LNUM, c->log_last); - dbg_msg("LPT area LEBs: %d (%d - %d)", + dbg_gen("LPT area LEBs: %d (%d - %d)", c->lpt_lebs, c->lpt_first, c->lpt_last); - dbg_msg("orphan area LEBs: %d (%d - %d)", + dbg_gen("orphan area LEBs: %d (%d - %d)", c->orph_lebs, c->orph_first, c->orph_last); - dbg_msg("main area LEBs: %d (%d - %d)", + dbg_gen("main area LEBs: %d (%d - %d)", c->main_lebs, c->main_first, c->leb_cnt - 1); - dbg_msg("index LEBs: %d", c->lst.idx_lebs); - dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", - c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); - dbg_msg("key hash type: %d", c->key_hash_type); - dbg_msg("tree fanout: %d", c->fanout); - dbg_msg("reserved GC LEB: %d", c->gc_lnum); - dbg_msg("first main LEB: %d", c->main_first); - dbg_msg("max. znode size %d", c->max_znode_sz); - dbg_msg("max. index node size %d", c->max_idx_node_sz); - dbg_msg("node sizes: data %zu, inode %zu, dentry %zu", + dbg_gen("index LEBs: %d", c->lst.idx_lebs); + dbg_gen("total index bytes: %lld (%lld KiB, %lld MiB)", + c->bi.old_idx_sz, c->bi.old_idx_sz >> 10, + c->bi.old_idx_sz >> 20); + dbg_gen("key hash type: %d", c->key_hash_type); + dbg_gen("tree fanout: %d", c->fanout); + dbg_gen("reserved GC LEB: %d", c->gc_lnum); + dbg_gen("max. znode size %d", c->max_znode_sz); + dbg_gen("max. index node size %d", c->max_idx_node_sz); + dbg_gen("node sizes: data %zu, inode %zu, dentry %zu", UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ); - dbg_msg("node sizes: trun %zu, sb %zu, master %zu", + dbg_gen("node sizes: trun %zu, sb %zu, master %zu", UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); - dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", + dbg_gen("node sizes: ref %zu, cmt. start %zu, orph %zu", UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); - dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu", + dbg_gen("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, - UBIFS_MAX_DENT_NODE_SZ); - dbg_msg("dead watermark: %d", c->dead_wm); - dbg_msg("dark watermark: %d", c->dark_wm); - dbg_msg("LEB overhead: %d", c->leb_overhead); + UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); + dbg_gen("dead watermark: %d", c->dead_wm); + dbg_gen("dark watermark: %d", c->dark_wm); + dbg_gen("LEB overhead: %d", c->leb_overhead); x = (long long)c->main_lebs * c->dark_wm; - dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)", + dbg_gen("max. dark space: %lld (%lld KiB, %lld MiB)", x, x >> 10, x >> 20); - dbg_msg("maximum bud bytes: %lld (%lld KiB, %lld MiB)", + dbg_gen("maximum bud bytes: %lld (%lld KiB, %lld MiB)", c->max_bud_bytes, c->max_bud_bytes >> 10, c->max_bud_bytes >> 20); - dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)", + dbg_gen("BG commit bud bytes: %lld (%lld KiB, %lld MiB)", c->bg_bud_bytes, c->bg_bud_bytes >> 10, c->bg_bud_bytes >> 20); - dbg_msg("current bud bytes %lld (%lld KiB, %lld MiB)", + dbg_gen("current bud bytes %lld (%lld KiB, %lld MiB)", c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20); - dbg_msg("max. seq. number: %llu", c->max_sqnum); - dbg_msg("commit number: %llu", c->cmt_no); + dbg_gen("max. seq. number: %llu", c->max_sqnum); + dbg_gen("commit number: %llu", c->cmt_no); return 0; @@ -797,7 +1679,10 @@ out_infos: spin_unlock(&ubifs_infos_lock); out_orphans: free_orphans(c); +#ifndef __UBOOT__ out_journal: + destroy_journal(c); +#endif out_lpt: ubifs_lpt_free(c, 0); out_master: @@ -805,8 +1690,15 @@ out_master: kfree(c->rcvrd_mst_node); if (c->bgt) kthread_stop(c->bgt); +#ifndef __UBOOT__ +out_wbufs: +#endif + free_wbufs(c); +out_cbuf: kfree(c->cbuf); out_free: + kfree(c->write_reserve_buf); + kfree(c->bu.buf); vfree(c->ileb_buf); vfree(c->sbuf); kfree(c->bottom_up_buf); @@ -823,57 +1715,447 @@ out_free: * through mounting (error path cleanup function). So it has to make sure the * resource was actually allocated before freeing it. */ +#ifndef __UBOOT__ +static void ubifs_umount(struct ubifs_info *c) +#else void ubifs_umount(struct ubifs_info *c) +#endif { dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num, c->vi.vol_id); + dbg_debugfs_exit_fs(c); spin_lock(&ubifs_infos_lock); list_del(&c->infos_list); spin_unlock(&ubifs_infos_lock); +#ifndef __UBOOT__ if (c->bgt) kthread_stop(c->bgt); + destroy_journal(c); +#endif + free_wbufs(c); free_orphans(c); ubifs_lpt_free(c, 0); kfree(c->cbuf); kfree(c->rcvrd_mst_node); kfree(c->mst_node); + kfree(c->write_reserve_buf); + kfree(c->bu.buf); vfree(c->ileb_buf); vfree(c->sbuf); kfree(c->bottom_up_buf); ubifs_debugging_exit(c); - +#ifdef __UBOOT__ /* Finally free U-Boot's global copy of superblock */ if (ubifs_sb != NULL) { free(ubifs_sb->s_fs_info); free(ubifs_sb); } +#endif +} + +#ifndef __UBOOT__ +/** + * ubifs_remount_rw - re-mount in read-write mode. + * @c: UBIFS file-system description object + * + * UBIFS avoids allocating many unnecessary resources when mounted in read-only + * mode. This function allocates the needed resources and re-mounts UBIFS in + * read-write mode. + */ +static int ubifs_remount_rw(struct ubifs_info *c) +{ + int err, lnum; + + if (c->rw_incompat) { + ubifs_err("the file-system is not R/W-compatible"); + ubifs_msg("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d", + c->fmt_version, c->ro_compat_version, + UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); + return -EROFS; + } + + mutex_lock(&c->umount_mutex); + dbg_save_space_info(c); + c->remounting_rw = 1; + c->ro_mount = 0; + + if (c->space_fixup) { + err = ubifs_fixup_free_space(c); + if (err) + return err; + } + + err = check_free_space(c); + if (err) + goto out; + + if (c->old_leb_cnt != c->leb_cnt) { + struct ubifs_sb_node *sup; + + sup = ubifs_read_sb_node(c); + if (IS_ERR(sup)) { + err = PTR_ERR(sup); + goto out; + } + sup->leb_cnt = cpu_to_le32(c->leb_cnt); + err = ubifs_write_sb_node(c, sup); + kfree(sup); + if (err) + goto out; + } + + if (c->need_recovery) { + ubifs_msg("completing deferred recovery"); + err = ubifs_write_rcvrd_mst_node(c); + if (err) + goto out; + err = ubifs_recover_size(c); + if (err) + goto out; + err = ubifs_clean_lebs(c, c->sbuf); + if (err) + goto out; + err = ubifs_recover_inl_heads(c, c->sbuf); + if (err) + goto out; + } else { + /* A readonly mount is not allowed to have orphans */ + ubifs_assert(c->tot_orphans == 0); + err = ubifs_clear_orphans(c); + if (err) + goto out; + } + + if (!(c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY))) { + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + err = ubifs_write_master(c); + if (err) + goto out; + } + + c->ileb_buf = vmalloc(c->leb_size); + if (!c->ileb_buf) { + err = -ENOMEM; + goto out; + } + + c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL); + if (!c->write_reserve_buf) { + err = -ENOMEM; + goto out; + } + + err = ubifs_lpt_init(c, 0, 1); + if (err) + goto out; + + /* Create background thread */ + c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); + if (IS_ERR(c->bgt)) { + err = PTR_ERR(c->bgt); + c->bgt = NULL; + ubifs_err("cannot spawn \"%s\", error %d", + c->bgt_name, err); + goto out; + } + wake_up_process(c->bgt); + + c->orph_buf = vmalloc(c->leb_size); + if (!c->orph_buf) { + err = -ENOMEM; + goto out; + } + + /* Check for enough log space */ + lnum = c->lhead_lnum + 1; + if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) + lnum = UBIFS_LOG_LNUM; + if (lnum == c->ltail_lnum) { + err = ubifs_consolidate_log(c); + if (err) + goto out; + } + + if (c->need_recovery) + err = ubifs_rcvry_gc_commit(c); + else + err = ubifs_leb_unmap(c, c->gc_lnum); + if (err) + goto out; + + dbg_gen("re-mounted read-write"); + c->remounting_rw = 0; + + if (c->need_recovery) { + c->need_recovery = 0; + ubifs_msg("deferred recovery completed"); + } else { + /* + * Do not run the debugging space check if the were doing + * recovery, because when we saved the information we had the + * file-system in a state where the TNC and lprops has been + * modified in memory, but all the I/O operations (including a + * commit) were deferred. So the file-system was in + * "non-committed" state. Now the file-system is in committed + * state, and of course the amount of free space will change + * because, for example, the old index size was imprecise. + */ + err = dbg_check_space_info(c); + } + + mutex_unlock(&c->umount_mutex); + return err; + +out: + c->ro_mount = 1; + vfree(c->orph_buf); + c->orph_buf = NULL; + if (c->bgt) { + kthread_stop(c->bgt); + c->bgt = NULL; + } + free_wbufs(c); + kfree(c->write_reserve_buf); + c->write_reserve_buf = NULL; + vfree(c->ileb_buf); + c->ileb_buf = NULL; + ubifs_lpt_free(c, 1); + c->remounting_rw = 0; + mutex_unlock(&c->umount_mutex); + return err; +} + +/** + * ubifs_remount_ro - re-mount in read-only mode. + * @c: UBIFS file-system description object + * + * We assume VFS has stopped writing. Possibly the background thread could be + * running a commit, however kthread_stop will wait in that case. + */ +static void ubifs_remount_ro(struct ubifs_info *c) +{ + int i, err; + + ubifs_assert(!c->need_recovery); + ubifs_assert(!c->ro_mount); + + mutex_lock(&c->umount_mutex); + if (c->bgt) { + kthread_stop(c->bgt); + c->bgt = NULL; + } + + dbg_save_space_info(c); + + for (i = 0; i < c->jhead_cnt; i++) + ubifs_wbuf_sync(&c->jheads[i].wbuf); + + c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); + c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); + err = ubifs_write_master(c); + if (err) + ubifs_ro_mode(c, err); + + vfree(c->orph_buf); + c->orph_buf = NULL; + kfree(c->write_reserve_buf); + c->write_reserve_buf = NULL; + vfree(c->ileb_buf); + c->ileb_buf = NULL; + ubifs_lpt_free(c, 1); + c->ro_mount = 1; + err = dbg_check_space_info(c); + if (err) + ubifs_ro_mode(c, err); + mutex_unlock(&c->umount_mutex); +} + +static void ubifs_put_super(struct super_block *sb) +{ + int i; + struct ubifs_info *c = sb->s_fs_info; + + ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, + c->vi.vol_id); + + /* + * The following asserts are only valid if there has not been a failure + * of the media. For example, there will be dirty inodes if we failed + * to write them back because of I/O errors. + */ + if (!c->ro_error) { + ubifs_assert(c->bi.idx_growth == 0); + ubifs_assert(c->bi.dd_growth == 0); + ubifs_assert(c->bi.data_growth == 0); + } + + /* + * The 'c->umount_lock' prevents races between UBIFS memory shrinker + * and file system un-mount. Namely, it prevents the shrinker from + * picking this superblock for shrinking - it will be just skipped if + * the mutex is locked. + */ + mutex_lock(&c->umount_mutex); + if (!c->ro_mount) { + /* + * First of all kill the background thread to make sure it does + * not interfere with un-mounting and freeing resources. + */ + if (c->bgt) { + kthread_stop(c->bgt); + c->bgt = NULL; + } + + /* + * On fatal errors c->ro_error is set to 1, in which case we do + * not write the master node. + */ + if (!c->ro_error) { + int err; + + /* Synchronize write-buffers */ + for (i = 0; i < c->jhead_cnt; i++) + ubifs_wbuf_sync(&c->jheads[i].wbuf); + + /* + * We are being cleanly unmounted which means the + * orphans were killed - indicate this in the master + * node. Also save the reserved GC LEB number. + */ + c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); + c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); + err = ubifs_write_master(c); + if (err) + /* + * Recovery will attempt to fix the master area + * next mount, so we just print a message and + * continue to unmount normally. + */ + ubifs_err("failed to write master node, error %d", + err); + } else { +#ifndef __UBOOT__ + for (i = 0; i < c->jhead_cnt; i++) + /* Make sure write-buffer timers are canceled */ + hrtimer_cancel(&c->jheads[i].wbuf.timer); +#endif + } + } + + ubifs_umount(c); +#ifndef __UBOOT__ + bdi_destroy(&c->bdi); +#endif + ubi_close_volume(c->ubi); + mutex_unlock(&c->umount_mutex); +} +#endif + +#ifndef __UBOOT__ +static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) +{ + int err; + struct ubifs_info *c = sb->s_fs_info; + + dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); + + err = ubifs_parse_options(c, data, 1); + if (err) { + ubifs_err("invalid or unknown remount parameter"); + return err; + } + + if (c->ro_mount && !(*flags & MS_RDONLY)) { + if (c->ro_error) { + ubifs_msg("cannot re-mount R/W due to prior errors"); + return -EROFS; + } + if (c->ro_media) { + ubifs_msg("cannot re-mount R/W - UBI volume is R/O"); + return -EROFS; + } + err = ubifs_remount_rw(c); + if (err) + return err; + } else if (!c->ro_mount && (*flags & MS_RDONLY)) { + if (c->ro_error) { + ubifs_msg("cannot re-mount R/O due to prior errors"); + return -EROFS; + } + ubifs_remount_ro(c); + } + + if (c->bulk_read == 1) + bu_init(c); + else { + dbg_gen("disable bulk-read"); + kfree(c->bu.buf); + c->bu.buf = NULL; + } + + ubifs_assert(c->lst.taken_empty_lebs > 0); + return 0; } +#endif + +const struct super_operations ubifs_super_operations = { + .alloc_inode = ubifs_alloc_inode, +#ifndef __UBOOT__ + .destroy_inode = ubifs_destroy_inode, + .put_super = ubifs_put_super, + .write_inode = ubifs_write_inode, + .evict_inode = ubifs_evict_inode, + .statfs = ubifs_statfs, +#endif + .dirty_inode = ubifs_dirty_inode, +#ifndef __UBOOT__ + .remount_fs = ubifs_remount_fs, + .show_options = ubifs_show_options, + .sync_fs = ubifs_sync_fs, +#endif +}; /** * open_ubi - parse UBI device name string and open the UBI device. * @name: UBI volume name * @mode: UBI volume open mode * - * There are several ways to specify UBI volumes when mounting UBIFS: - * o ubiX_Y - UBI device number X, volume Y; - * o ubiY - UBI device number 0, volume Y; + * The primary method of mounting UBIFS is by specifying the UBI volume + * character device node path. However, UBIFS may also be mounted withoug any + * character device node using one of the following methods: + * + * o ubiX_Y - mount UBI device number X, volume Y; + * o ubiY - mount UBI device number 0, volume Y; * o ubiX:NAME - mount UBI device X, volume with name NAME; * o ubi:NAME - mount UBI device 0, volume with name NAME. * * Alternative '!' separator may be used instead of ':' (because some shells * like busybox may interpret ':' as an NFS host name separator). This function - * returns ubi volume object in case of success and a negative error code in - * case of failure. + * returns UBI volume description object in case of success and a negative + * error code in case of failure. */ static struct ubi_volume_desc *open_ubi(const char *name, int mode) { +#ifndef __UBOOT__ + struct ubi_volume_desc *ubi; +#endif int dev, vol; char *endptr; +#ifndef __UBOOT__ + /* First, try to open using the device node path method */ + ubi = ubi_open_volume_path(name, mode); + if (!IS_ERR(ubi)) + return ubi; +#endif + + /* Try the "nodev" method */ if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i') return ERR_PTR(-EINVAL); @@ -905,78 +2187,100 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode) return ERR_PTR(-EINVAL); } -static int ubifs_fill_super(struct super_block *sb, void *data, int silent) +static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi) { - struct ubi_volume_desc *ubi = sb->s_fs_info; struct ubifs_info *c; - struct inode *root; - int err; c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL); - if (!c) - return -ENOMEM; + if (c) { + spin_lock_init(&c->cnt_lock); + spin_lock_init(&c->cs_lock); + spin_lock_init(&c->buds_lock); + spin_lock_init(&c->space_lock); + spin_lock_init(&c->orphan_lock); + init_rwsem(&c->commit_sem); + mutex_init(&c->lp_mutex); + mutex_init(&c->tnc_mutex); + mutex_init(&c->log_mutex); + mutex_init(&c->mst_mutex); + mutex_init(&c->umount_mutex); + mutex_init(&c->bu_mutex); + mutex_init(&c->write_reserve_mutex); + init_waitqueue_head(&c->cmt_wq); + c->buds = RB_ROOT; + c->old_idx = RB_ROOT; + c->size_tree = RB_ROOT; + c->orph_tree = RB_ROOT; + INIT_LIST_HEAD(&c->infos_list); + INIT_LIST_HEAD(&c->idx_gc); + INIT_LIST_HEAD(&c->replay_list); + INIT_LIST_HEAD(&c->replay_buds); + INIT_LIST_HEAD(&c->uncat_list); + INIT_LIST_HEAD(&c->empty_list); + INIT_LIST_HEAD(&c->freeable_list); + INIT_LIST_HEAD(&c->frdi_idx_list); + INIT_LIST_HEAD(&c->unclean_leb_list); + INIT_LIST_HEAD(&c->old_buds); + INIT_LIST_HEAD(&c->orph_list); + INIT_LIST_HEAD(&c->orph_new); + c->no_chk_data_crc = 1; + + c->highest_inum = UBIFS_FIRST_INO; + c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; + + ubi_get_volume_info(ubi, &c->vi); + ubi_get_device_info(c->vi.ubi_num, &c->di); + } + return c; +} - spin_lock_init(&c->cnt_lock); - spin_lock_init(&c->cs_lock); - spin_lock_init(&c->buds_lock); - spin_lock_init(&c->space_lock); - spin_lock_init(&c->orphan_lock); - init_rwsem(&c->commit_sem); - mutex_init(&c->lp_mutex); - mutex_init(&c->tnc_mutex); - mutex_init(&c->log_mutex); - mutex_init(&c->mst_mutex); - mutex_init(&c->umount_mutex); - init_waitqueue_head(&c->cmt_wq); - c->buds = RB_ROOT; - c->old_idx = RB_ROOT; - c->size_tree = RB_ROOT; - c->orph_tree = RB_ROOT; - INIT_LIST_HEAD(&c->infos_list); - INIT_LIST_HEAD(&c->idx_gc); - INIT_LIST_HEAD(&c->replay_list); - INIT_LIST_HEAD(&c->replay_buds); - INIT_LIST_HEAD(&c->uncat_list); - INIT_LIST_HEAD(&c->empty_list); - INIT_LIST_HEAD(&c->freeable_list); - INIT_LIST_HEAD(&c->frdi_idx_list); - INIT_LIST_HEAD(&c->unclean_leb_list); - INIT_LIST_HEAD(&c->old_buds); - INIT_LIST_HEAD(&c->orph_list); - INIT_LIST_HEAD(&c->orph_new); - - c->highest_inum = UBIFS_FIRST_INO; - c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; - - ubi_get_volume_info(ubi, &c->vi); - ubi_get_device_info(c->vi.ubi_num, &c->di); +static int ubifs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct ubifs_info *c = sb->s_fs_info; + struct inode *root; + int err; + c->vfs_sb = sb; /* Re-open the UBI device in read-write mode */ - c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READONLY); + c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE); if (IS_ERR(c->ubi)) { err = PTR_ERR(c->ubi); - goto out_free; + goto out; } - c->vfs_sb = sb; +#ifndef __UBOOT__ + /* + * UBIFS provides 'backing_dev_info' in order to disable read-ahead. For + * UBIFS, I/O is not deferred, it is done immediately in readpage, + * which means the user would have to wait not just for their own I/O + * but the read-ahead I/O as well i.e. completely pointless. + * + * Read-ahead will be disabled because @c->bdi.ra_pages is 0. + */ + co>bdi.name = "ubifs", + c->bdi.capabilities = BDI_CAP_MAP_COPY; + err = bdi_init(&c->bdi); + if (err) + goto out_close; + err = bdi_register(&c->bdi, NULL, "ubifs_%d_%d", + c->vi.ubi_num, c->vi.vol_id); + if (err) + goto out_bdi; + + err = ubifs_parse_options(c, data, 0); + if (err) + goto out_bdi; + sb->s_bdi = &c->bdi; +#endif sb->s_fs_info = c; sb->s_magic = UBIFS_SUPER_MAGIC; sb->s_blocksize = UBIFS_BLOCK_SIZE; sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT; - sb->s_dev = c->vi.cdev; sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c); if (c->max_inode_sz > MAX_LFS_FILESIZE) sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; - - if (c->rw_incompat) { - ubifs_err("the file-system is not R/W-compatible"); - ubifs_msg("on-flash format version is w%d/r%d, but software " - "only supports up to version w%d/r%d", c->fmt_version, - c->ro_compat_version, UBIFS_FORMAT_VERSION, - UBIFS_RO_COMPAT_VERSION); - return -EROFS; - } + sb->s_op = &ubifs_super_operations; mutex_lock(&c->umount_mutex); err = mount_ubifs(c); @@ -992,7 +2296,15 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) goto out_umount; } +#ifndef __UBOOT__ + sb->s_root = d_make_root(root); + if (!sb->s_root) { + err = -ENOMEM; + goto out_umount; + } +#else sb->s_root = NULL; +#endif mutex_unlock(&c->umount_mutex); return 0; @@ -1001,24 +2313,130 @@ out_umount: ubifs_umount(c); out_unlock: mutex_unlock(&c->umount_mutex); +#ifndef __UBOOT__ +out_bdi: + bdi_destroy(&c->bdi); +out_close: +#endif ubi_close_volume(c->ubi); -out_free: - kfree(c); +out: return err; } static int sb_test(struct super_block *sb, void *data) { - dev_t *dev = data; + struct ubifs_info *c1 = data; + struct ubifs_info *c = sb->s_fs_info; - return sb->s_dev == *dev; + return c->vi.cdev == c1->vi.cdev; } -static int ubifs_get_sb(struct file_system_type *fs_type, int flags, - const char *name, void *data, struct vfsmount *mnt) +static int sb_set(struct super_block *sb, void *data) +{ + sb->s_fs_info = data; + return set_anon_super(sb, NULL); +} + +static struct super_block *alloc_super(struct file_system_type *type, int flags) +{ + struct super_block *s; + int err; + + s = kzalloc(sizeof(struct super_block), GFP_USER); + if (!s) { + err = -ENOMEM; + return ERR_PTR(err); + } + + INIT_HLIST_NODE(&s->s_instances); + INIT_LIST_HEAD(&s->s_inodes); + s->s_time_gran = 1000000000; + s->s_flags = flags; + + return s; +} + +/** + * sget - find or create a superblock + * @type: filesystem type superblock should belong to + * @test: comparison callback + * @set: setup callback + * @flags: mount flags + * @data: argument to each of them + */ +struct super_block *sget(struct file_system_type *type, + int (*test)(struct super_block *,void *), + int (*set)(struct super_block *,void *), + int flags, + void *data) +{ + struct super_block *s = NULL; +#ifndef __UBOOT__ + struct super_block *old; +#endif + int err; + +#ifndef __UBOOT__ +retry: + spin_lock(&sb_lock); + if (test) { + hlist_for_each_entry(old, &type->fs_supers, s_instances) { + if (!test(old, data)) + continue; + if (!grab_super(old)) + goto retry; + if (s) { + up_write(&s->s_umount); + destroy_super(s); + s = NULL; + } + return old; + } + } +#endif + if (!s) { + spin_unlock(&sb_lock); + s = alloc_super(type, flags); + if (!s) + return ERR_PTR(-ENOMEM); +#ifndef __UBOOT__ + goto retry; +#endif + } + + err = set(s, data); + if (err) { +#ifndef __UBOOT__ + spin_unlock(&sb_lock); + up_write(&s->s_umount); + destroy_super(s); +#endif + return ERR_PTR(err); + } + s->s_type = type; +#ifndef __UBOOT__ + strlcpy(s->s_id, type->name, sizeof(s->s_id)); +#else + strncpy(s->s_id, type->name, sizeof(s->s_id)); +#endif + list_add_tail(&s->s_list, &super_blocks); + hlist_add_head(&s->s_instances, &type->fs_supers); +#ifndef __UBOOT__ + spin_unlock(&sb_lock); + get_filesystem(type); + register_shrinker(&s->s_shrink); +#endif + return s; +} + +EXPORT_SYMBOL(sget); + + +static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, + const char *name, void *data) { struct ubi_volume_desc *ubi; - struct ubi_volume_info vi; + struct ubifs_info *c; struct super_block *sb; int err; @@ -1033,32 +2451,34 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags, if (IS_ERR(ubi)) { ubifs_err("cannot open \"%s\", error %d", name, (int)PTR_ERR(ubi)); - return PTR_ERR(ubi); + return ERR_CAST(ubi); + } + + c = alloc_ubifs_info(ubi); + if (!c) { + err = -ENOMEM; + goto out_close; } - ubi_get_volume_info(ubi, &vi); - dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); + dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); - sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev); + sb = sget(fs_type, sb_test, sb_set, flags, c); if (IS_ERR(sb)) { err = PTR_ERR(sb); + kfree(c); goto out_close; } if (sb->s_root) { + struct ubifs_info *c1 = sb->s_fs_info; + kfree(c); /* A new mount point for already mounted UBIFS */ dbg_gen("this ubi volume is already mounted"); - if ((flags ^ sb->s_flags) & MS_RDONLY) { + if (!!(flags & MS_RDONLY) != c1->ro_mount) { err = -EBUSY; goto out_deact; } } else { - sb->s_flags = flags; - /* - * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is - * replaced by 'c'. - */ - sb->s_fs_info = ubi; err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (err) goto out_deact; @@ -1069,17 +2489,53 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags, /* 'fill_super()' opens ubi again so we must close it here */ ubi_close_volume(ubi); +#ifdef __UBOOT__ ubifs_sb = sb; return 0; +#else + return dget(sb->s_root); +#endif out_deact: - up_write(&sb->s_umount); +#ifndef __UBOOT__ + deactivate_locked_super(sb); +#endif out_close: ubi_close_volume(ubi); - return err; + return ERR_PTR(err); +} + +static void kill_ubifs_super(struct super_block *s) +{ + struct ubifs_info *c = s->s_fs_info; +#ifndef __UBOOT__ + kill_anon_super(s); +#endif + kfree(c); +} + +static struct file_system_type ubifs_fs_type = { + .name = "ubifs", + .owner = THIS_MODULE, + .mount = ubifs_mount, + .kill_sb = kill_ubifs_super, +}; +#ifndef __UBOOT__ +MODULE_ALIAS_FS("ubifs"); + +/* + * Inode slab cache constructor. + */ +static void inode_slab_ctor(void *obj) +{ + struct ubifs_inode *ui = obj; + inode_init_once(&ui->vfs_inode); } -int __init ubifs_init(void) +static int __init ubifs_init(void) +#else +int ubifs_init(void) +#endif { int err; @@ -1135,41 +2591,84 @@ int __init ubifs_init(void) * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. */ if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) { - ubifs_err("VFS page cache size is %u bytes, but UBIFS requires" - " at least 4096 bytes", + ubifs_err("VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes", (unsigned int)PAGE_CACHE_SIZE); return -EINVAL; } - err = -ENOMEM; +#ifndef __UBOOT__ + ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab", + sizeof(struct ubifs_inode), 0, + SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT, + &inode_slab_ctor); + if (!ubifs_inode_slab) + return -ENOMEM; + + register_shrinker(&ubifs_shrinker_info); +#endif err = ubifs_compressors_init(); if (err) goto out_shrinker; +#ifndef __UBOOT__ + err = dbg_debugfs_init(); + if (err) + goto out_compr; + + err = register_filesystem(&ubifs_fs_type); + if (err) { + ubifs_err("cannot register file system, error %d", err); + goto out_dbg; + } +#endif return 0; +#ifndef __UBOOT__ +out_dbg: + dbg_debugfs_exit(); +out_compr: + ubifs_compressors_exit(); +#endif out_shrinker: +#ifndef __UBOOT__ + unregister_shrinker(&ubifs_shrinker_info); +#endif + kmem_cache_destroy(ubifs_inode_slab); return err; } +/* late_initcall to let compressors initialize first */ +late_initcall(ubifs_init); -/* - * ubifsmount... - */ +#ifndef __UBOOT__ +static void __exit ubifs_exit(void) +{ + ubifs_assert(list_empty(&ubifs_infos)); + ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0); -static struct file_system_type ubifs_fs_type = { - .name = "ubifs", - .owner = THIS_MODULE, - .get_sb = ubifs_get_sb, -}; + dbg_debugfs_exit(); + ubifs_compressors_exit(); + unregister_shrinker(&ubifs_shrinker_info); -int ubifs_mount(char *name) + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + kmem_cache_destroy(ubifs_inode_slab); + unregister_filesystem(&ubifs_fs_type); +} +module_exit(ubifs_exit); + +MODULE_LICENSE("GPL"); +MODULE_VERSION(__stringify(UBIFS_VERSION)); +MODULE_AUTHOR("Artem Bityutskiy, Adrian Hunter"); +MODULE_DESCRIPTION("UBIFS - UBI File System"); +#else +int uboot_ubifs_mount(char *vol_name) { + struct dentry *ret; int flags; - void *data; - struct vfsmount *mnt; - int ret; - struct ubifs_info *c; /* * First unmount if allready mounted @@ -1177,23 +2676,17 @@ int ubifs_mount(char *name) if (ubifs_sb) ubifs_umount(ubifs_sb->s_fs_info); - INIT_LIST_HEAD(&ubifs_infos); - INIT_LIST_HEAD(&ubifs_fs_type.fs_supers); - /* * Mount in read-only mode */ flags = MS_RDONLY; - data = NULL; - mnt = NULL; - ret = ubifs_get_sb(&ubifs_fs_type, flags, name, data, mnt); - if (ret) { - ubifs_err("Error reading superblock on volume '%s' errno=%d!\n", name, ret); + ret = ubifs_mount(&ubifs_fs_type, flags, vol_name, NULL); + if (IS_ERR(ret)) { + printf("Error reading superblock on volume '%s' " \ + "errno=%d!\n", vol_name, (int)PTR_ERR(ret)); return -1; } - c = ubifs_sb->s_fs_info; - ubi_close_volume(c->ubi); - return 0; } +#endif diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index ccda938..eda5070 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) @@ -30,6 +19,15 @@ * the mutex locked. */ +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/crc32.h> +#include <linux/slab.h> +#else +#include <linux/compat.h> +#include <linux/err.h> +#include <linux/stat.h> +#endif #include "ubifs.h" /* @@ -176,27 +174,11 @@ static int ins_clr_old_idx_znode(struct ubifs_info *c, */ void destroy_old_idx(struct ubifs_info *c) { - struct rb_node *this = c->old_idx.rb_node; - struct ubifs_old_idx *old_idx; + struct ubifs_old_idx *old_idx, *n; - while (this) { - if (this->rb_left) { - this = this->rb_left; - continue; - } else if (this->rb_right) { - this = this->rb_right; - continue; - } - old_idx = rb_entry(this, struct ubifs_old_idx, rb); - this = rb_parent(this); - if (this) { - if (this->rb_left == &old_idx->rb) - this->rb_left = NULL; - else - this->rb_right = NULL; - } + rbtree_postorder_for_each_entry_safe(old_idx, n, &c->old_idx, rb) kfree(old_idx); - } + c->old_idx = RB_ROOT; } @@ -221,7 +203,7 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c, __set_bit(DIRTY_ZNODE, &zn->flags); __clear_bit(COW_ZNODE, &zn->flags); - ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); + ubifs_assert(!ubifs_zn_obsolete(znode)); __set_bit(OBSOLETE_ZNODE, &znode->flags); if (znode->level != 0) { @@ -269,7 +251,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, struct ubifs_znode *zn; int err; - if (!test_bit(COW_ZNODE, &znode->flags)) { + if (!ubifs_zn_cow(znode)) { /* znode is not being committed */ if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) { atomic_long_inc(&c->dirty_zn_cnt); @@ -337,17 +319,16 @@ static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr, err = ubifs_validate_entry(c, dent); if (err) { - dbg_dump_stack(); - dbg_dump_node(c, dent); + dump_stack(); + ubifs_dump_node(c, dent); return err; } - lnc_node = kmalloc(zbr->len, GFP_NOFS); + lnc_node = kmemdup(node, zbr->len, GFP_NOFS); if (!lnc_node) /* We don't have to have the cache, so no error */ return 0; - memcpy(lnc_node, node, zbr->len); zbr->leaf = lnc_node; return 0; } @@ -371,8 +352,8 @@ static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr, err = ubifs_validate_entry(c, node); if (err) { - dbg_dump_stack(); - dbg_dump_node(c, node); + dump_stack(); + ubifs_dump_node(c, node); return err; } @@ -445,8 +426,11 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, * * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc * is true (it is controlled by corresponding mount option). However, if - * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always - * checked. + * @c->mounting or @c->remounting_rw is true (we are mounting or re-mounting to + * R/W mode), @c->no_chk_data_crc is ignored and CRC is checked. This is + * because during mounting or re-mounting from R/O mode to R/W mode we may read + * journal nodes (when replying the journal or doing the recovery) and the + * journal nodes may potentially be corrupted, so checking is required. */ static int try_read_node(const struct ubifs_info *c, void *buf, int type, int len, int lnum, int offs) @@ -457,7 +441,7 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); - err = ubi_read(c->ubi, lnum, buf, offs, len); + err = ubifs_leb_read(c, lnum, buf, offs, len, 1); if (err) { ubifs_err("cannot read node type %d from LEB %d:%d, error %d", type, lnum, offs, err); @@ -474,7 +458,8 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, if (node_len != len) return 0; - if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc) + if (type == UBIFS_DATA_NODE && c->no_chk_data_crc && !c->mounting && + !c->remounting_rw) return 1; crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); @@ -500,7 +485,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, { int ret; - dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key)); + dbg_tnck(key, "LEB %d:%d, key ", zbr->lnum, zbr->offs); ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum, zbr->offs); @@ -514,8 +499,8 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, ret = 0; } if (ret == 0 && c->replaying) - dbg_mnt("dangling branch LEB %d:%d len %d, key %s", - zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); + dbg_mntk(key, "dangling branch LEB %d:%d len %d, key ", + zbr->lnum, zbr->offs, zbr->len); return ret; } @@ -990,9 +975,9 @@ static int fallible_resolve_collision(struct ubifs_info *c, if (adding || !o_znode) return 0; - dbg_mnt("dangling match LEB %d:%d len %d %s", + dbg_mntk(key, "dangling match LEB %d:%d len %d key ", o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs, - o_znode->zbranch[o_n].len, DBGKEY(key)); + o_znode->zbranch[o_n].len); *zn = o_znode; *n = o_n; return 1; @@ -1158,8 +1143,8 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c, * o exact match, i.e. the found zero-level znode contains key @key, then %1 * is returned and slot number of the matched branch is stored in @n; * o not exact match, which means that zero-level znode does not contain - * @key, then %0 is returned and slot number of the closed branch is stored - * in @n; + * @key, then %0 is returned and slot number of the closest branch is stored + * in @n; * o @key is so small that it is even less than the lowest key of the * leftmost zero-level node, then %0 is returned and %0 is stored in @n. * @@ -1174,7 +1159,8 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode *znode; unsigned long time = get_seconds(); - dbg_tnc("search key %s", DBGKEY(key)); + dbg_tnck(key, "search key "); + ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY); znode = c->zroot.znode; if (unlikely(!znode)) { @@ -1251,7 +1237,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, * splitting in the middle of the colliding sequence. Also, when * removing the leftmost key, we would have to correct the key of the * parent node, which would introduce additional complications. Namely, - * if we changed the the leftmost key of the parent znode, the garbage + * if we changed the leftmost key of the parent znode, the garbage * collector would be unable to find it (GC is doing this when GC'ing * indexing LEBs). Although we already have an additional RB-tree where * we save such changed znodes (see 'ins_clr_old_idx_znode()') until @@ -1309,7 +1295,7 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode *znode; unsigned long time = get_seconds(); - dbg_tnc("search and dirty key %s", DBGKEY(key)); + dbg_tnck(key, "search and dirty key "); znode = c->zroot.znode; if (unlikely(!znode)) { @@ -1400,9 +1386,31 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, */ static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) { +#ifndef __UBOOT__ + int gc_seq2, gced_lnum; + + gced_lnum = c->gced_lnum; + smp_rmb(); + gc_seq2 = c->gc_seq; + /* Same seq means no GC */ + if (gc_seq1 == gc_seq2) + return 0; + /* Different by more than 1 means we don't know */ + if (gc_seq1 + 1 != gc_seq2) + return 1; /* - * No garbage collection in the read-only U-Boot implementation + * We have seen the sequence number has increased by 1. Now we need to + * be sure we read the right LEB number, so read it again. */ + smp_rmb(); + if (gced_lnum != c->gced_lnum) + return 1; + /* Finally we can check lnum */ + if (gced_lnum == lnum) + return 1; +#else + /* No garbage collection in the read-only U-Boot implementation */ +#endif return 0; } @@ -1414,7 +1422,7 @@ static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) * @lnum: LEB number is returned here * @offs: offset is returned here * - * This function look up and reads node with key @key. The caller has to make + * This function looks up and reads node with key @key. The caller has to make * sure the @node buffer is large enough to fit the node. Returns zero in case * of success, %-ENOENT if the node was not found, and a negative error code in * case of failure. The node location can be returned in @lnum and @offs. @@ -1458,6 +1466,12 @@ again: gc_seq1 = c->gc_seq; mutex_unlock(&c->tnc_mutex); + if (ubifs_get_wbuf(c, zbr.lnum)) { + /* We do not GC journal heads */ + err = ubifs_tnc_read_node(c, &zbr, node); + return err; + } + err = fallible_read_node(c, key, &zbr, node); if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) { /* @@ -1610,6 +1624,51 @@ out: } /** + * read_wbuf - bulk-read from a LEB with a wbuf. + * @wbuf: wbuf that may overlap the read + * @buf: buffer into which to read + * @len: read length + * @lnum: LEB number from which to read + * @offs: offset from which to read + * + * This functions returns %0 on success or a negative error code on failure. + */ +static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, + int offs) +{ + const struct ubifs_info *c = wbuf->c; + int rlen, overlap; + + dbg_io("LEB %d:%d, length %d", lnum, offs, len); + ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); + ubifs_assert(!(offs & 7) && offs < c->leb_size); + ubifs_assert(offs + len <= c->leb_size); + + spin_lock(&wbuf->lock); + overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs); + if (!overlap) { + /* We may safely unlock the write-buffer and read the data */ + spin_unlock(&wbuf->lock); + return ubifs_leb_read(c, lnum, buf, offs, len, 0); + } + + /* Don't read under wbuf */ + rlen = wbuf->offs - offs; + if (rlen < 0) + rlen = 0; + + /* Copy the rest from the write-buffer */ + memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen); + spin_unlock(&wbuf->lock); + + if (rlen > 0) + /* Read everything that goes before write-buffer */ + return ubifs_leb_read(c, lnum, buf, offs, rlen, 0); + + return 0; +} + +/** * validate_data_node - validate data nodes for bulk-read. * @c: UBIFS file-system description object * @buf: buffer containing data node to validate @@ -1647,8 +1706,8 @@ static int validate_data_node(struct ubifs_info *c, void *buf, if (!keys_eq(c, &zbr->key, &key1)) { ubifs_err("bad key in node at LEB %d:%d", zbr->lnum, zbr->offs); - dbg_tnc("looked for key %s found node's key %s", - DBGKEY(&zbr->key), DBGKEY1(&key1)); + dbg_tnck(&zbr->key, "looked for key "); + dbg_tnck(&key1, "found node's key "); goto out_err; } @@ -1658,8 +1717,8 @@ out_err: err = -EINVAL; out: ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs); - dbg_dump_node(c, buf); - dbg_dump_stack(); + ubifs_dump_node(c, buf); + dump_stack(); return err; } @@ -1676,6 +1735,7 @@ out: int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) { int lnum = bu->zbranch[0].lnum, offs = bu->zbranch[0].offs, len, err, i; + struct ubifs_wbuf *wbuf; void *buf; len = bu->zbranch[bu->cnt - 1].offs; @@ -1686,7 +1746,11 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) } /* Do the read */ - err = ubi_read(c->ubi, lnum, bu->buf, offs, len); + wbuf = ubifs_get_wbuf(c, lnum); + if (wbuf) + err = read_wbuf(wbuf, bu->buf, len, lnum, offs); + else + err = ubifs_leb_read(c, lnum, bu->buf, offs, len, 0); /* Check for a race with GC */ if (maybe_leb_gced(c, lnum, bu->gc_seq)) @@ -1695,8 +1759,8 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) if (err && err != -EBADMSG) { ubifs_err("failed to read from LEB %d:%d, error %d", lnum, offs, err); - dbg_dump_stack(); - dbg_tnc("key %s", DBGKEY(&bu->key)); + dump_stack(); + dbg_tnck(&bu->key, "key "); return err; } @@ -1731,7 +1795,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, int found, n, err; struct ubifs_znode *znode; - dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); + dbg_tnck(key, "name '%.*s' key ", nm->len, nm->name); mutex_lock(&c->tnc_mutex); found = ubifs_lookup_level0(c, key, &znode, &n); if (!found) { @@ -1905,8 +1969,7 @@ again: zp = znode->parent; if (znode->child_cnt < c->fanout) { ubifs_assert(n != c->fanout); - dbg_tnc("inserted at %d level %d, key %s", n, znode->level, - DBGKEY(key)); + dbg_tnck(key, "inserted at %d level %d, key ", n, znode->level); insert_zbranch(znode, zbr, n); @@ -1921,7 +1984,7 @@ again: * Unfortunately, @znode does not have more empty slots and we have to * split it. */ - dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key)); + dbg_tnck(key, "splitting level %d, key ", znode->level); if (znode->alt) /* @@ -2015,7 +2078,7 @@ do_split: } /* Insert new key and branch */ - dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key)); + dbg_tnck(key, "inserting at %d level %d, key ", n, zn->level); insert_zbranch(zi, zbr, n); @@ -2091,7 +2154,7 @@ int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, struct ubifs_znode *znode; mutex_lock(&c->tnc_mutex); - dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key)); + dbg_tnck(key, "%d:%d, len %d, key ", lnum, offs, len); found = lookup_level0_dirty(c, key, &znode, &n); if (!found) { struct ubifs_zbranch zbr; @@ -2140,8 +2203,8 @@ int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode *znode; mutex_lock(&c->tnc_mutex); - dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum, - old_offs, lnum, offs, len, DBGKEY(key)); + dbg_tnck(key, "old LEB %d:%d, new LEB %d:%d, len %d, key ", old_lnum, + old_offs, lnum, offs, len); found = lookup_level0_dirty(c, key, &znode, &n); if (found < 0) { err = found; @@ -2223,8 +2286,8 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode *znode; mutex_lock(&c->tnc_mutex); - dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name, - DBGKEY(key)); + dbg_tnck(key, "LEB %d:%d, name '%.*s', key ", + lnum, offs, nm->len, nm->name); found = lookup_level0_dirty(c, key, &znode, &n); if (found < 0) { err = found; @@ -2282,7 +2345,7 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, * by passing 'ubifs_tnc_remove_nm()' the same key but * an unmatchable name. */ - struct qstr noname = { .len = 0, .name = "" }; + struct qstr noname = { .name = "" }; err = dbg_check_tnc(c, 0); mutex_unlock(&c->tnc_mutex); @@ -2317,14 +2380,14 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) /* Delete without merge for now */ ubifs_assert(znode->level == 0); ubifs_assert(n >= 0 && n < c->fanout); - dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key)); + dbg_tnck(&znode->zbranch[n].key, "deleting key "); zbr = &znode->zbranch[n]; lnc_free(zbr); err = ubifs_add_dirt(c, zbr->lnum, zbr->len); if (err) { - dbg_dump_znode(c, znode); + ubifs_dump_znode(c, znode); return err; } @@ -2342,7 +2405,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) */ do { - ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); + ubifs_assert(!ubifs_zn_obsolete(znode)); ubifs_assert(ubifs_zn_dirty(znode)); zp = znode->parent; @@ -2398,9 +2461,8 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) c->zroot.offs = zbr->offs; c->zroot.len = zbr->len; c->zroot.znode = znode; - ubifs_assert(!test_bit(OBSOLETE_ZNODE, - &zp->flags)); - ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags)); + ubifs_assert(!ubifs_zn_obsolete(zp)); + ubifs_assert(ubifs_zn_dirty(zp)); atomic_long_dec(&c->dirty_zn_cnt); if (zp->cnext) { @@ -2428,7 +2490,7 @@ int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key) struct ubifs_znode *znode; mutex_lock(&c->tnc_mutex); - dbg_tnc("key %s", DBGKEY(key)); + dbg_tnck(key, "key "); found = lookup_level0_dirty(c, key, &znode, &n); if (found < 0) { err = found; @@ -2459,7 +2521,7 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode *znode; mutex_lock(&c->tnc_mutex); - dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key)); + dbg_tnck(key, "%.*s, key ", nm->len, nm->name); err = lookup_level0_dirty(c, key, &znode, &n); if (err < 0) goto out_unlock; @@ -2476,11 +2538,11 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, if (err) { /* Ensure the znode is dirtied */ if (znode->cnext || !ubifs_zn_dirty(znode)) { - znode = dirty_cow_bottom_up(c, znode); - if (IS_ERR(znode)) { - err = PTR_ERR(znode); - goto out_unlock; - } + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } } err = tnc_delete(c, znode, n); } @@ -2571,10 +2633,10 @@ int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, err = ubifs_add_dirt(c, znode->zbranch[i].lnum, znode->zbranch[i].len); if (err) { - dbg_dump_znode(c, znode); + ubifs_dump_znode(c, znode); goto out_unlock; } - dbg_tnc("removing %s", DBGKEY(key)); + dbg_tnck(key, "removing key "); } if (k) { for (i = n + 1 + k; i < znode->child_cnt; i++) @@ -2633,7 +2695,7 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum) dbg_tnc("xent '%s', ino %lu", xent->name, (unsigned long)xattr_inum); - nm.name = (char *)xent->name; + nm.name = xent->name; nm.len = le16_to_cpu(xent->nlen); err = ubifs_tnc_remove_nm(c, &key1, &nm); if (err) { @@ -2694,7 +2756,7 @@ struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, struct ubifs_zbranch *zbr; union ubifs_key *dkey; - dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key)); + dbg_tnck(key, "%s ", nm->name ? (char *)nm->name : "(lowest)"); ubifs_assert(is_hash_key(c, key)); mutex_lock(&c->tnc_mutex); @@ -2765,3 +2827,503 @@ out_unlock: mutex_unlock(&c->tnc_mutex); return ERR_PTR(err); } + +#ifndef __UBOOT__ +/** + * tnc_destroy_cnext - destroy left-over obsolete znodes from a failed commit. + * @c: UBIFS file-system description object + * + * Destroy left-over obsolete znodes from a failed commit. + */ +static void tnc_destroy_cnext(struct ubifs_info *c) +{ + struct ubifs_znode *cnext; + + if (!c->cnext) + return; + ubifs_assert(c->cmt_state == COMMIT_BROKEN); + cnext = c->cnext; + do { + struct ubifs_znode *znode = cnext; + + cnext = cnext->cnext; + if (ubifs_zn_obsolete(znode)) + kfree(znode); + } while (cnext && cnext != c->cnext); +} + +/** + * ubifs_tnc_close - close TNC subsystem and free all related resources. + * @c: UBIFS file-system description object + */ +void ubifs_tnc_close(struct ubifs_info *c) +{ + tnc_destroy_cnext(c); + if (c->zroot.znode) { + long n; + + ubifs_destroy_tnc_subtree(c->zroot.znode); + n = atomic_long_read(&c->clean_zn_cnt); + atomic_long_sub(n, &ubifs_clean_zn_cnt); + } + kfree(c->gap_lebs); + kfree(c->ilebs); + destroy_old_idx(c); +} +#endif + +/** + * left_znode - get the znode to the left. + * @c: UBIFS file-system description object + * @znode: znode + * + * This function returns a pointer to the znode to the left of @znode or NULL if + * there is not one. A negative error code is returned on failure. + */ +static struct ubifs_znode *left_znode(struct ubifs_info *c, + struct ubifs_znode *znode) +{ + int level = znode->level; + + while (1) { + int n = znode->iip - 1; + + /* Go up until we can go left */ + znode = znode->parent; + if (!znode) + return NULL; + if (n >= 0) { + /* Now go down the rightmost branch to 'level' */ + znode = get_znode(c, znode, n); + if (IS_ERR(znode)) + return znode; + while (znode->level != level) { + n = znode->child_cnt - 1; + znode = get_znode(c, znode, n); + if (IS_ERR(znode)) + return znode; + } + break; + } + } + return znode; +} + +/** + * right_znode - get the znode to the right. + * @c: UBIFS file-system description object + * @znode: znode + * + * This function returns a pointer to the znode to the right of @znode or NULL + * if there is not one. A negative error code is returned on failure. + */ +static struct ubifs_znode *right_znode(struct ubifs_info *c, + struct ubifs_znode *znode) +{ + int level = znode->level; + + while (1) { + int n = znode->iip + 1; + + /* Go up until we can go right */ + znode = znode->parent; + if (!znode) + return NULL; + if (n < znode->child_cnt) { + /* Now go down the leftmost branch to 'level' */ + znode = get_znode(c, znode, n); + if (IS_ERR(znode)) + return znode; + while (znode->level != level) { + znode = get_znode(c, znode, 0); + if (IS_ERR(znode)) + return znode; + } + break; + } + } + return znode; +} + +/** + * lookup_znode - find a particular indexing node from TNC. + * @c: UBIFS file-system description object + * @key: index node key to lookup + * @level: index node level + * @lnum: index node LEB number + * @offs: index node offset + * + * This function searches an indexing node by its first key @key and its + * address @lnum:@offs. It looks up the indexing tree by pulling all indexing + * nodes it traverses to TNC. This function is called for indexing nodes which + * were found on the media by scanning, for example when garbage-collecting or + * when doing in-the-gaps commit. This means that the indexing node which is + * looked for does not have to have exactly the same leftmost key @key, because + * the leftmost key may have been changed, in which case TNC will contain a + * dirty znode which still refers the same @lnum:@offs. This function is clever + * enough to recognize such indexing nodes. + * + * Note, if a znode was deleted or changed too much, then this function will + * not find it. For situations like this UBIFS has the old index RB-tree + * (indexed by @lnum:@offs). + * + * This function returns a pointer to the znode found or %NULL if it is not + * found. A negative error code is returned on failure. + */ +static struct ubifs_znode *lookup_znode(struct ubifs_info *c, + union ubifs_key *key, int level, + int lnum, int offs) +{ + struct ubifs_znode *znode, *zn; + int n, nn; + + ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY); + + /* + * The arguments have probably been read off flash, so don't assume + * they are valid. + */ + if (level < 0) + return ERR_PTR(-EINVAL); + + /* Get the root znode */ + znode = c->zroot.znode; + if (!znode) { + znode = ubifs_load_znode(c, &c->zroot, NULL, 0); + if (IS_ERR(znode)) + return znode; + } + /* Check if it is the one we are looking for */ + if (c->zroot.lnum == lnum && c->zroot.offs == offs) + return znode; + /* Descend to the parent level i.e. (level + 1) */ + if (level >= znode->level) + return NULL; + while (1) { + ubifs_search_zbranch(c, znode, key, &n); + if (n < 0) { + /* + * We reached a znode where the leftmost key is greater + * than the key we are searching for. This is the same + * situation as the one described in a huge comment at + * the end of the 'ubifs_lookup_level0()' function. And + * for exactly the same reasons we have to try to look + * left before giving up. + */ + znode = left_znode(c, znode); + if (!znode) + return NULL; + if (IS_ERR(znode)) + return znode; + ubifs_search_zbranch(c, znode, key, &n); + ubifs_assert(n >= 0); + } + if (znode->level == level + 1) + break; + znode = get_znode(c, znode, n); + if (IS_ERR(znode)) + return znode; + } + /* Check if the child is the one we are looking for */ + if (znode->zbranch[n].lnum == lnum && znode->zbranch[n].offs == offs) + return get_znode(c, znode, n); + /* If the key is unique, there is nowhere else to look */ + if (!is_hash_key(c, key)) + return NULL; + /* + * The key is not unique and so may be also in the znodes to either + * side. + */ + zn = znode; + nn = n; + /* Look left */ + while (1) { + /* Move one branch to the left */ + if (n) + n -= 1; + else { + znode = left_znode(c, znode); + if (!znode) + break; + if (IS_ERR(znode)) + return znode; + n = znode->child_cnt - 1; + } + /* Check it */ + if (znode->zbranch[n].lnum == lnum && + znode->zbranch[n].offs == offs) + return get_znode(c, znode, n); + /* Stop if the key is less than the one we are looking for */ + if (keys_cmp(c, &znode->zbranch[n].key, key) < 0) + break; + } + /* Back to the middle */ + znode = zn; + n = nn; + /* Look right */ + while (1) { + /* Move one branch to the right */ + if (++n >= znode->child_cnt) { + znode = right_znode(c, znode); + if (!znode) + break; + if (IS_ERR(znode)) + return znode; + n = 0; + } + /* Check it */ + if (znode->zbranch[n].lnum == lnum && + znode->zbranch[n].offs == offs) + return get_znode(c, znode, n); + /* Stop if the key is greater than the one we are looking for */ + if (keys_cmp(c, &znode->zbranch[n].key, key) > 0) + break; + } + return NULL; +} + +/** + * is_idx_node_in_tnc - determine if an index node is in the TNC. + * @c: UBIFS file-system description object + * @key: key of index node + * @level: index node level + * @lnum: LEB number of index node + * @offs: offset of index node + * + * This function returns %0 if the index node is not referred to in the TNC, %1 + * if the index node is referred to in the TNC and the corresponding znode is + * dirty, %2 if an index node is referred to in the TNC and the corresponding + * znode is clean, and a negative error code in case of failure. + * + * Note, the @key argument has to be the key of the first child. Also note, + * this function relies on the fact that 0:0 is never a valid LEB number and + * offset for a main-area node. + */ +int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs) +{ + struct ubifs_znode *znode; + + znode = lookup_znode(c, key, level, lnum, offs); + if (!znode) + return 0; + if (IS_ERR(znode)) + return PTR_ERR(znode); + + return ubifs_zn_dirty(znode) ? 1 : 2; +} + +/** + * is_leaf_node_in_tnc - determine if a non-indexing not is in the TNC. + * @c: UBIFS file-system description object + * @key: node key + * @lnum: node LEB number + * @offs: node offset + * + * This function returns %1 if the node is referred to in the TNC, %0 if it is + * not, and a negative error code in case of failure. + * + * Note, this function relies on the fact that 0:0 is never a valid LEB number + * and offset for a main-area node. + */ +static int is_leaf_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, + int lnum, int offs) +{ + struct ubifs_zbranch *zbr; + struct ubifs_znode *znode, *zn; + int n, found, err, nn; + const int unique = !is_hash_key(c, key); + + found = ubifs_lookup_level0(c, key, &znode, &n); + if (found < 0) + return found; /* Error code */ + if (!found) + return 0; + zbr = &znode->zbranch[n]; + if (lnum == zbr->lnum && offs == zbr->offs) + return 1; /* Found it */ + if (unique) + return 0; + /* + * Because the key is not unique, we have to look left + * and right as well + */ + zn = znode; + nn = n; + /* Look left */ + while (1) { + err = tnc_prev(c, &znode, &n); + if (err == -ENOENT) + break; + if (err) + return err; + if (keys_cmp(c, key, &znode->zbranch[n].key)) + break; + zbr = &znode->zbranch[n]; + if (lnum == zbr->lnum && offs == zbr->offs) + return 1; /* Found it */ + } + /* Look right */ + znode = zn; + n = nn; + while (1) { + err = tnc_next(c, &znode, &n); + if (err) { + if (err == -ENOENT) + return 0; + return err; + } + if (keys_cmp(c, key, &znode->zbranch[n].key)) + break; + zbr = &znode->zbranch[n]; + if (lnum == zbr->lnum && offs == zbr->offs) + return 1; /* Found it */ + } + return 0; +} + +/** + * ubifs_tnc_has_node - determine whether a node is in the TNC. + * @c: UBIFS file-system description object + * @key: node key + * @level: index node level (if it is an index node) + * @lnum: node LEB number + * @offs: node offset + * @is_idx: non-zero if the node is an index node + * + * This function returns %1 if the node is in the TNC, %0 if it is not, and a + * negative error code in case of failure. For index nodes, @key has to be the + * key of the first child. An index node is considered to be in the TNC only if + * the corresponding znode is clean or has not been loaded. + */ +int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs, int is_idx) +{ + int err; + + mutex_lock(&c->tnc_mutex); + if (is_idx) { + err = is_idx_node_in_tnc(c, key, level, lnum, offs); + if (err < 0) + goto out_unlock; + if (err == 1) + /* The index node was found but it was dirty */ + err = 0; + else if (err == 2) + /* The index node was found and it was clean */ + err = 1; + else + BUG_ON(err != 0); + } else + err = is_leaf_node_in_tnc(c, key, lnum, offs); + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * ubifs_dirty_idx_node - dirty an index node. + * @c: UBIFS file-system description object + * @key: index node key + * @level: index node level + * @lnum: index node LEB number + * @offs: index node offset + * + * This function loads and dirties an index node so that it can be garbage + * collected. The @key argument has to be the key of the first child. This + * function relies on the fact that 0:0 is never a valid LEB number and offset + * for a main-area node. Returns %0 on success and a negative error code on + * failure. + */ +int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs) +{ + struct ubifs_znode *znode; + int err = 0; + + mutex_lock(&c->tnc_mutex); + znode = lookup_znode(c, key, level, lnum, offs); + if (!znode) + goto out_unlock; + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * dbg_check_inode_size - check if inode size is correct. + * @c: UBIFS file-system description object + * @inum: inode number + * @size: inode size + * + * This function makes sure that the inode size (@size) is correct and it does + * not have any pages beyond @size. Returns zero if the inode is OK, %-EINVAL + * if it has a data page beyond @size, and other negative error code in case of + * other errors. + */ +int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, + loff_t size) +{ + int err, n; + union ubifs_key from_key, to_key, *key; + struct ubifs_znode *znode; + unsigned int block; + + if (!S_ISREG(inode->i_mode)) + return 0; + if (!dbg_is_chk_gen(c)) + return 0; + + block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; + data_key_init(c, &from_key, inode->i_ino, block); + highest_data_key(c, &to_key, inode->i_ino); + + mutex_lock(&c->tnc_mutex); + err = ubifs_lookup_level0(c, &from_key, &znode, &n); + if (err < 0) + goto out_unlock; + + if (err) { + err = -EINVAL; + key = &from_key; + goto out_dump; + } + + err = tnc_next(c, &znode, &n); + if (err == -ENOENT) { + err = 0; + goto out_unlock; + } + if (err < 0) + goto out_unlock; + + ubifs_assert(err == 0); + key = &znode->zbranch[n].key; + if (!key_in_range(c, key, &from_key, &to_key)) + goto out_unlock; + +out_dump: + block = key_block(c, key); + ubifs_err("inode %lu has size %lld, but there are data at offset %lld", + (unsigned long)inode->i_ino, size, + ((loff_t)block) << UBIFS_BLOCK_SHIFT); + mutex_unlock(&c->tnc_mutex); + ubifs_dump_inode(c, inode); + dump_stack(); + return -EINVAL; + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c index 955219f..81bdad9 100644 --- a/fs/ubifs/tnc_misc.c +++ b/fs/ubifs/tnc_misc.c @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) @@ -27,6 +16,10 @@ * putting it all in one file would make that file too big and unreadable. */ +#define __UBOOT__ +#ifdef __UBOOT__ +#include <linux/err.h> +#endif #include "ubifs.h" /** @@ -219,6 +212,44 @@ struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode) } /** + * ubifs_destroy_tnc_subtree - destroy all znodes connected to a subtree. + * @znode: znode defining subtree to destroy + * + * This function destroys subtree of the TNC tree. Returns number of clean + * znodes in the subtree. + */ +long ubifs_destroy_tnc_subtree(struct ubifs_znode *znode) +{ + struct ubifs_znode *zn = ubifs_tnc_postorder_first(znode); + long clean_freed = 0; + int n; + + ubifs_assert(zn); + while (1) { + for (n = 0; n < zn->child_cnt; n++) { + if (!zn->zbranch[n].znode) + continue; + + if (zn->level > 0 && + !ubifs_zn_dirty(zn->zbranch[n].znode)) + clean_freed += 1; + + cond_resched(); + kfree(zn->zbranch[n].znode); + } + + if (zn == znode) { + if (!ubifs_zn_dirty(zn)) + clean_freed += 1; + kfree(zn); + return clean_freed; + } + + zn = ubifs_tnc_postorder_next(zn); + } +} + +/** * read_znode - read an indexing node from flash and fill znode. * @c: UBIFS file-system description object * @lnum: LEB of the indexing node to read @@ -255,10 +286,10 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, lnum, offs, znode->level, znode->child_cnt); if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) { - dbg_err("current fanout %d, branch count %d", - c->fanout, znode->child_cnt); - dbg_err("max levels %d, znode level %d", - UBIFS_MAX_LEVELS, znode->level); + ubifs_err("current fanout %d, branch count %d", + c->fanout, znode->child_cnt); + ubifs_err("max levels %d, znode level %d", + UBIFS_MAX_LEVELS, znode->level); err = 1; goto out_dump; } @@ -278,7 +309,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, if (zbr->lnum < c->main_first || zbr->lnum >= c->leb_cnt || zbr->offs < 0 || zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) { - dbg_err("bad branch %d", i); + ubifs_err("bad branch %d", i); err = 2; goto out_dump; } @@ -290,8 +321,8 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, case UBIFS_XENT_KEY: break; default: - dbg_msg("bad key type at slot %d: %s", i, - DBGKEY(&zbr->key)); + ubifs_err("bad key type at slot %d: %d", + i, key_type(c, &zbr->key)); err = 3; goto out_dump; } @@ -302,19 +333,19 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, type = key_type(c, &zbr->key); if (c->ranges[type].max_len == 0) { if (zbr->len != c->ranges[type].len) { - dbg_err("bad target node (type %d) length (%d)", - type, zbr->len); - dbg_err("have to be %d", c->ranges[type].len); + ubifs_err("bad target node (type %d) length (%d)", + type, zbr->len); + ubifs_err("have to be %d", c->ranges[type].len); err = 4; goto out_dump; } } else if (zbr->len < c->ranges[type].min_len || zbr->len > c->ranges[type].max_len) { - dbg_err("bad target node (type %d) length (%d)", - type, zbr->len); - dbg_err("have to be in range of %d-%d", - c->ranges[type].min_len, - c->ranges[type].max_len); + ubifs_err("bad target node (type %d) length (%d)", + type, zbr->len); + ubifs_err("have to be in range of %d-%d", + c->ranges[type].min_len, + c->ranges[type].max_len); err = 5; goto out_dump; } @@ -332,13 +363,13 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, cmp = keys_cmp(c, key1, key2); if (cmp > 0) { - dbg_err("bad key order (keys %d and %d)", i, i + 1); + ubifs_err("bad key order (keys %d and %d)", i, i + 1); err = 6; goto out_dump; } else if (cmp == 0 && !is_hash_key(c, key1)) { /* These can only be keys with colliding hash */ - dbg_err("keys %d and %d are not hashed but equivalent", - i, i + 1); + ubifs_err("keys %d and %d are not hashed but equivalent", + i, i + 1); err = 7; goto out_dump; } @@ -349,7 +380,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, out_dump: ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err); - dbg_dump_node(c, idx); + ubifs_dump_node(c, idx); kfree(idx); return -EINVAL; } @@ -385,6 +416,16 @@ struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, if (err) goto out; + atomic_long_inc(&c->clean_zn_cnt); + + /* + * Increment the global clean znode counter as well. It is OK that + * global and per-FS clean znode counters may be inconsistent for some + * short time (because we might be preempted at this point), the global + * one is only used in shrinker. + */ + atomic_long_inc(&ubifs_clean_zn_cnt); + zbr->znode = znode; znode->parent = parent; znode->time = get_seconds(); @@ -412,11 +453,22 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, { union ubifs_key key1, *key = &zbr->key; int err, type = key_type(c, key); + struct ubifs_wbuf *wbuf; - err = ubifs_read_node(c, node, type, zbr->len, zbr->lnum, zbr->offs); + /* + * 'zbr' has to point to on-flash node. The node may sit in a bud and + * may even be in a write buffer, so we have to take care about this. + */ + wbuf = ubifs_get_wbuf(c, zbr->lnum); + if (wbuf) + err = ubifs_read_node_wbuf(wbuf, node, type, zbr->len, + zbr->lnum, zbr->offs); + else + err = ubifs_read_node(c, node, type, zbr->len, zbr->lnum, + zbr->offs); if (err) { - dbg_tnc("key %s", DBGKEY(key)); + dbg_tnck(key, "key "); return err; } @@ -425,9 +477,9 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, if (!keys_eq(c, key, &key1)) { ubifs_err("bad key in node at LEB %d:%d", zbr->lnum, zbr->offs); - dbg_tnc("looked for key %s found node's key %s", - DBGKEY(key), DBGKEY1(&key1)); - dbg_dump_node(c, node); + dbg_tnck(key, "looked for key "); + dbg_tnck(&key1, "but found node's key "); + ubifs_dump_node(c, node); return -EINVAL; } diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index 3eee07e..90b8ffa 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h @@ -3,18 +3,7 @@ * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter @@ -135,6 +124,13 @@ /* The key is always at the same position in all keyed nodes */ #define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key) +/* Garbage collector journal head number */ +#define UBIFS_GC_HEAD 0 +/* Base journal head number */ +#define UBIFS_BASE_HEAD 1 +/* Data journal head number */ +#define UBIFS_DATA_HEAD 2 + /* * LEB Properties Tree node types. * @@ -401,9 +397,11 @@ enum { * Superblock flags. * * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set + * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed */ enum { UBIFS_FLG_BIGLPT = 0x02, + UBIFS_FLG_SPACE_FIXUP = 0x04, }; /** @@ -427,7 +425,7 @@ struct ubifs_ch { __u8 node_type; __u8 group_type; __u8 padding[2]; -} __attribute__ ((packed)); +} __packed; /** * union ubifs_dev_desc - device node descriptor. @@ -441,7 +439,7 @@ struct ubifs_ch { union ubifs_dev_desc { __le32 new; __le64 huge; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_ino_node - inode node. @@ -502,7 +500,7 @@ struct ubifs_ino_node { __le16 compr_type; __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ __u8 data[]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_dent_node - directory entry node. @@ -526,8 +524,12 @@ struct ubifs_dent_node { __u8 type; __le16 nlen; __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ +#ifndef __UBOOT__ __u8 name[]; -} __attribute__ ((packed)); +#else + char name[]; +#endif +} __packed; /** * struct ubifs_data_node - data node. @@ -548,7 +550,7 @@ struct ubifs_data_node { __le16 compr_type; __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ __u8 data[]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_trun_node - truncation node. @@ -568,7 +570,7 @@ struct ubifs_trun_node { __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ __le64 old_size; __le64 new_size; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_pad_node - padding node. @@ -579,7 +581,7 @@ struct ubifs_trun_node { struct ubifs_pad_node { struct ubifs_ch ch; __le32 pad_len; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_sb_node - superblock node. @@ -637,7 +639,7 @@ struct ubifs_sb_node { __u8 uuid[16]; __le32 ro_compat_version; __u8 padding2[3968]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_mst_node - master node. @@ -704,7 +706,7 @@ struct ubifs_mst_node { __le32 idx_lebs; __le32 leb_cnt; __u8 padding[344]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_ref_node - logical eraseblock reference node. @@ -720,7 +722,7 @@ struct ubifs_ref_node { __le32 offs; __le32 jhead; __u8 padding[28]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_branch - key/reference/length branch @@ -733,8 +735,12 @@ struct ubifs_branch { __le32 lnum; __le32 offs; __le32 len; +#ifndef __UBOOT__ __u8 key[]; -} __attribute__ ((packed)); +#else + char key[]; +#endif +} __packed; /** * struct ubifs_idx_node - indexing node. @@ -747,8 +753,12 @@ struct ubifs_idx_node { struct ubifs_ch ch; __le16 child_cnt; __le16 level; +#ifndef __UBOOT__ __u8 branches[]; -} __attribute__ ((packed)); +#else + char branches[]; +#endif +} __packed; /** * struct ubifs_cs_node - commit start node. @@ -758,7 +768,7 @@ struct ubifs_idx_node { struct ubifs_cs_node { struct ubifs_ch ch; __le64 cmt_no; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_orph_node - orphan node. @@ -770,6 +780,6 @@ struct ubifs_orph_node { struct ubifs_ch ch; __le64 cmt_no; __le64 inos[]; -} __attribute__ ((packed)); +} __packed; #endif /* __UBIFS_MEDIA_H__ */ diff --git a/fs/ubifs/ubifs.c b/fs/ubifs/ubifs.c index 273c0a9..b91a6fd 100644 --- a/fs/ubifs/ubifs.c +++ b/fs/ubifs/ubifs.c @@ -26,6 +26,10 @@ #include "ubifs.h" #include <u-boot/zlib.h> +#define __UBOOT__ +#include <linux/err.h> +#include <linux/lzo.h> + DECLARE_GLOBAL_DATA_PTR; /* compress.c */ @@ -44,20 +48,27 @@ static int gzip_decompress(const unsigned char *in, size_t in_len, /* Fake description object for the "none" compressor */ static struct ubifs_compressor none_compr = { .compr_type = UBIFS_COMPR_NONE, - .name = "no compression", + .name = "none", .capi_name = "", .decompress = NULL, }; static struct ubifs_compressor lzo_compr = { .compr_type = UBIFS_COMPR_LZO, - .name = "LZO", +#ifndef __UBOOT__ + .comp_mutex = &lzo_mutex, +#endif + .name = "lzo", .capi_name = "lzo", .decompress = lzo1x_decompress_safe, }; static struct ubifs_compressor zlib_compr = { .compr_type = UBIFS_COMPR_ZLIB, +#ifndef __UBOOT__ + .comp_mutex = &deflate_mutex, + .decomp_mutex = &inflate_mutex, +#endif .name = "zlib", .capi_name = "deflate", .decompress = gzip_decompress, @@ -66,6 +77,82 @@ static struct ubifs_compressor zlib_compr = { /* All UBIFS compressors */ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; + +#ifdef __UBOOT__ +/* from mm/util.c */ + +/** + * kmemdup - duplicate region of memory + * + * @src: memory region to duplicate + * @len: memory region length + * @gfp: GFP mask to use + */ +void *kmemdup(const void *src, size_t len, gfp_t gfp) +{ + void *p; + + p = kmalloc(len, gfp); + if (p) + memcpy(p, src, len); + return p; +} + +struct crypto_comp { + int compressor; +}; + +static inline struct crypto_comp *crypto_alloc_comp(const char *alg_name, + u32 type, u32 mask) +{ + struct ubifs_compressor *comp; + struct crypto_comp *ptr; + int i = 0; + + ptr = malloc(sizeof(struct crypto_comp)); + while (i < UBIFS_COMPR_TYPES_CNT) { + comp = ubifs_compressors[i]; + if (!comp) { + i++; + continue; + } + if (strncmp(alg_name, comp->capi_name, strlen(alg_name)) == 0) { + ptr->compressor = i; + return ptr; + } + i++; + } + if (i >= UBIFS_COMPR_TYPES_CNT) { + ubifs_err("invalid compression type %s", alg_name); + free (ptr); + return NULL; + } + return ptr; +} +static inline int crypto_comp_decompress(struct crypto_comp *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen) +{ + struct ubifs_compressor *compr = ubifs_compressors[tfm->compressor]; + int err; + + if (compr->compr_type == UBIFS_COMPR_NONE) { + memcpy(dst, src, slen); + *dlen = slen; + return 0; + } + + err = compr->decompress(src, slen, dst, (size_t *)dlen); + if (err) + ubifs_err("cannot decompress %d bytes, compressor %s, " + "error %d", slen, compr->name, err); + + return err; + + return 0; +} +#endif + /** * ubifs_decompress - decompress data. * @in_buf: data to decompress @@ -102,10 +189,15 @@ int ubifs_decompress(const void *in_buf, int in_len, void *out_buf, return 0; } - err = compr->decompress(in_buf, in_len, out_buf, (size_t *)out_len); + if (compr->decomp_mutex) + mutex_lock(compr->decomp_mutex); + err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf, + (unsigned int *)out_len); + if (compr->decomp_mutex) + mutex_unlock(compr->decomp_mutex); if (err) - ubifs_err("cannot decompress %d bytes, compressor %s, " - "error %d", in_len, compr->name, err); + ubifs_err("cannot decompress %d bytes, compressor %s, error %d", + in_len, compr->name, err); return err; } @@ -127,6 +219,15 @@ static int __init compr_init(struct ubifs_compressor *compr) ubifs_compressors[compr->compr_type]->decompress += gd->reloc_off; #endif + if (compr->capi_name) { + compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0); + if (IS_ERR(compr->cc)) { + ubifs_err("cannot initialize compressor %s, error %ld", + compr->name, PTR_ERR(compr->cc)); + return PTR_ERR(compr->cc); + } + } + return 0; } @@ -188,7 +289,9 @@ static int filldir(struct ubifs_info *c, const char *name, int namlen, } ctime_r((time_t *)&inode->i_mtime, filetime); printf("%9lld %24.24s ", inode->i_size, filetime); +#ifndef __UBOOT__ ubifs_iput(inode); +#endif printf("%s\n", name); @@ -562,7 +665,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block, dump: ubifs_err("bad data node (block %u, inode %lu)", block, inode->i_ino); - dbg_dump_node(c, dn); + ubifs_dump_node(c, dn); return -EINVAL; } diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 2213201..acc6a40 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -6,18 +6,7 @@ * (C) Copyright 2008-2009 * Stefan Roese, DENX Software Engineering, sr@denx.de. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * SPDX-License-Identifier: GPL-2.0+ * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter @@ -26,12 +15,25 @@ #ifndef __UBIFS_H__ #define __UBIFS_H__ -#if 0 /* Enable for debugging output */ -#define CONFIG_UBIFS_FS_DEBUG -#define CONFIG_UBIFS_FS_DEBUG_MSG_LVL 3 -#endif - +#define __UBOOT__ +#ifndef __UBOOT__ +#include <asm/div64.h> +#include <linux/statfs.h> +#include <linux/fs.h> +#include <linux/err.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/rwsem.h> +#include <linux/mtd/ubi.h> +#include <linux/pagemap.h> +#include <linux/backing-dev.h> +#include "ubifs-media.h" +#else #include <ubi_uboot.h> + #include <linux/ctype.h> #include <linux/time.h> #include <linux/math64.h> @@ -70,13 +72,26 @@ void iput(struct inode *inode); #define atomic_long_dec(a) #define atomic_long_sub(a, b) +typedef unsigned long atomic_long_t; + /* linux/include/time.h */ +#define NSEC_PER_SEC 1000000000L +#define get_seconds() 0 +#define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) struct timespec { time_t tv_sec; /* seconds */ long tv_nsec; /* nanoseconds */ }; +static struct timespec current_fs_time(struct super_block *sb) +{ + struct timespec now; + now.tv_sec = 0; + now.tv_nsec = 0; + return now; +}; + /* linux/include/dcache.h */ /* @@ -89,111 +104,245 @@ struct timespec { struct qstr { unsigned int hash; unsigned int len; +#ifndef __UBOOT__ const char *name; +#else + char *name; +#endif +}; + +/* include/linux/fs.h */ + +/* Possible states of 'frozen' field */ +enum { + SB_UNFROZEN = 0, /* FS is unfrozen */ + SB_FREEZE_WRITE = 1, /* Writes, dir ops, ioctls frozen */ + SB_FREEZE_PAGEFAULT = 2, /* Page faults stopped as well */ + SB_FREEZE_FS = 3, /* For internal FS use (e.g. to stop + * internal threads if needed) */ + SB_FREEZE_COMPLETE = 4, /* ->freeze_fs finished successfully */ }; +#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1) + +struct sb_writers { +#ifndef __UBOOT__ + /* Counters for counting writers at each level */ + struct percpu_counter counter[SB_FREEZE_LEVELS]; +#endif + wait_queue_head_t wait; /* queue for waiting for + writers / faults to finish */ + int frozen; /* Is sb frozen? */ + wait_queue_head_t wait_unfrozen; /* queue for waiting for + sb to be thawed */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map lock_map[SB_FREEZE_LEVELS]; +#endif +}; + +struct address_space { + struct inode *host; /* owner: inode, block_device */ +#ifndef __UBOOT__ + struct radix_tree_root page_tree; /* radix tree of all pages */ +#endif + spinlock_t tree_lock; /* and lock protecting it */ + unsigned int i_mmap_writable;/* count VM_SHARED mappings */ + struct rb_root i_mmap; /* tree of private and shared mappings */ + struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ + struct mutex i_mmap_mutex; /* protect tree, count, list */ + /* Protected by tree_lock together with the radix tree */ + unsigned long nrpages; /* number of total pages */ + pgoff_t writeback_index;/* writeback starts here */ + const struct address_space_operations *a_ops; /* methods */ + unsigned long flags; /* error bits/gfp mask */ +#ifndef __UBOOT__ + struct backing_dev_info *backing_dev_info; /* device readahead, etc */ +#endif + spinlock_t private_lock; /* for use by the address_space */ + struct list_head private_list; /* ditto */ + void *private_data; /* ditto */ +} __attribute__((aligned(sizeof(long)))); + +/* + * Keep mostly read-only and often accessed (especially for + * the RCU path lookup and 'stat' data) fields at the beginning + * of the 'struct inode' + */ struct inode { - struct hlist_node i_hash; - struct list_head i_list; - struct list_head i_sb_list; - struct list_head i_dentry; + umode_t i_mode; + unsigned short i_opflags; + kuid_t i_uid; + kgid_t i_gid; + unsigned int i_flags; + +#ifdef CONFIG_FS_POSIX_ACL + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; +#endif + + const struct inode_operations *i_op; + struct super_block *i_sb; + struct address_space *i_mapping; + +#ifdef CONFIG_SECURITY + void *i_security; +#endif + + /* Stat data, not accessed from path walking */ unsigned long i_ino; - unsigned int i_nlink; - uid_t i_uid; - gid_t i_gid; + /* + * Filesystems may only read i_nlink directly. They shall use the + * following functions for modification: + * + * (set|clear|inc|drop)_nlink + * inode_(inc|dec)_link_count + */ + union { + const unsigned int i_nlink; + unsigned int __i_nlink; + }; dev_t i_rdev; - u64 i_version; loff_t i_size; -#ifdef __NEED_I_SIZE_ORDERED - seqcount_t i_size_seqcount; -#endif struct timespec i_atime; struct timespec i_mtime; struct timespec i_ctime; - unsigned int i_blkbits; - unsigned short i_bytes; - umode_t i_mode; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ + unsigned short i_bytes; + unsigned int i_blkbits; + blkcnt_t i_blocks; + +#ifdef __NEED_I_SIZE_ORDERED + seqcount_t i_size_seqcount; +#endif + + /* Misc */ + unsigned long i_state; struct mutex i_mutex; - struct rw_semaphore i_alloc_sem; - const struct inode_operations *i_op; + + unsigned long dirtied_when; /* jiffies of first dirtying */ + + struct hlist_node i_hash; + struct list_head i_wb_list; /* backing dev IO list */ + struct list_head i_lru; /* inode LRU list */ + struct list_head i_sb_list; + union { + struct hlist_head i_dentry; + struct rcu_head i_rcu; + }; + u64 i_version; + atomic_t i_count; + atomic_t i_dio_count; + atomic_t i_writecount; const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ - struct super_block *i_sb; struct file_lock *i_flock; + struct address_space i_data; #ifdef CONFIG_QUOTA struct dquot *i_dquot[MAXQUOTAS]; #endif struct list_head i_devices; - int i_cindex; + union { + struct pipe_inode_info *i_pipe; + struct block_device *i_bdev; + struct cdev *i_cdev; + }; __u32 i_generation; -#ifdef CONFIG_DNOTIFY - unsigned long i_dnotify_mask; /* Directory notify events */ - struct dnotify_struct *i_dnotify; /* for directory notifications */ +#ifdef CONFIG_FSNOTIFY + __u32 i_fsnotify_mask; /* all events this inode cares about */ + struct hlist_head i_fsnotify_marks; #endif -#ifdef CONFIG_INOTIFY - struct list_head inotify_watches; /* watches on this inode */ - struct mutex inotify_mutex; /* protects the watches list */ +#ifdef CONFIG_IMA + atomic_t i_readcount; /* struct files open RO */ #endif + void *i_private; /* fs or device private pointer */ +}; - unsigned long i_state; - unsigned long dirtied_when; /* jiffies of first dirtying */ - - unsigned int i_flags; - -#ifdef CONFIG_SECURITY - void *i_security; +struct super_operations { + struct inode *(*alloc_inode)(struct super_block *sb); + void (*destroy_inode)(struct inode *); + + void (*dirty_inode) (struct inode *, int flags); + int (*write_inode) (struct inode *, struct writeback_control *wbc); + int (*drop_inode) (struct inode *); + void (*evict_inode) (struct inode *); + void (*put_super) (struct super_block *); + int (*sync_fs)(struct super_block *sb, int wait); + int (*freeze_fs) (struct super_block *); + int (*unfreeze_fs) (struct super_block *); +#ifndef __UBOOT__ + int (*statfs) (struct dentry *, struct kstatfs *); #endif - void *i_private; /* fs or device private pointer */ + int (*remount_fs) (struct super_block *, int *, char *); + void (*umount_begin) (struct super_block *); + +#ifndef __UBOOT__ + int (*show_options)(struct seq_file *, struct dentry *); + int (*show_devname)(struct seq_file *, struct dentry *); + int (*show_path)(struct seq_file *, struct dentry *); + int (*show_stats)(struct seq_file *, struct dentry *); +#endif +#ifdef CONFIG_QUOTA + ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); + ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); +#endif + int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); + long (*nr_cached_objects)(struct super_block *, int); + long (*free_cached_objects)(struct super_block *, long, int); }; struct super_block { struct list_head s_list; /* Keep this first */ dev_t s_dev; /* search index; _not_ kdev_t */ - unsigned long s_blocksize; unsigned char s_blocksize_bits; - unsigned char s_dirt; - unsigned long long s_maxbytes; /* Max file size */ + unsigned long s_blocksize; + loff_t s_maxbytes; /* Max file size */ struct file_system_type *s_type; const struct super_operations *s_op; - struct dquot_operations *dq_op; - struct quotactl_ops *s_qcop; + const struct dquot_operations *dq_op; + const struct quotactl_ops *s_qcop; const struct export_operations *s_export_op; unsigned long s_flags; unsigned long s_magic; struct dentry *s_root; struct rw_semaphore s_umount; - struct mutex s_lock; int s_count; - int s_syncing; - int s_need_sync_fs; + atomic_t s_active; #ifdef CONFIG_SECURITY void *s_security; #endif - struct xattr_handler **s_xattr; + const struct xattr_handler **s_xattr; struct list_head s_inodes; /* all inodes */ - struct list_head s_dirty; /* dirty inodes */ - struct list_head s_io; /* parked for writeback */ - struct list_head s_more_io; /* parked for more writeback */ - struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ - struct list_head s_files; - /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ - struct list_head s_dentry_lru; /* unused dentry lru */ - int s_nr_dentry_unused; /* # of dentry on lru */ - +#ifndef __UBOOT__ + struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ +#endif + struct list_head s_mounts; /* list of mounts; _not_ for fs use */ struct block_device *s_bdev; +#ifndef __UBOOT__ + struct backing_dev_info *s_bdi; +#endif struct mtd_info *s_mtd; - struct list_head s_instances; + struct hlist_node s_instances; +#ifndef __UBOOT__ + struct quota_info s_dquot; /* Diskquota specific options */ +#endif - int s_frozen; - wait_queue_head_t s_wait_unfrozen; + struct sb_writers s_writers; char s_id[32]; /* Informational name */ + u8 s_uuid[16]; /* UUID */ void *s_fs_info; /* Filesystem private info */ + unsigned int s_max_links; +#ifndef __UBOOT__ + fmode_t s_mode; +#endif + + /* Granularity of c/m/atime in ns. + Cannot be worse than a second */ + u32 s_time_gran; /* * The next field is for VFS *only*. No filesystems have any business @@ -201,66 +350,83 @@ struct super_block { */ struct mutex s_vfs_rename_mutex; /* Kludge */ - /* Granularity of c/m/atime in ns. - Cannot be worse than a second */ - u32 s_time_gran; - /* * Filesystem subtype. If non-empty the filesystem type field * in /proc/mounts will be "type.subtype" */ char *s_subtype; +#ifndef __UBOOT__ /* * Saved mount options for lazy filesystems using * generic_show_options() */ - char *s_options; + char __rcu *s_options; +#endif + const struct dentry_operations *s_d_op; /* default d_op for dentries */ + + /* + * Saved pool identifier for cleancache (-1 means none) + */ + int cleancache_poolid; + +#ifndef __UBOOT__ + struct shrinker s_shrink; /* per-sb shrinker handle */ +#endif + + /* Number of inodes with nlink == 0 but still referenced */ + atomic_long_t s_remove_count; + + /* Being remounted read-only */ + int s_readonly_remount; + + /* AIO completions deferred from interrupt context */ + struct workqueue_struct *s_dio_done_wq; + +#ifndef __UBOOT__ + /* + * Keep the lru lists last in the structure so they always sit on their + * own individual cachelines. + */ + struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; + struct list_lru s_inode_lru ____cacheline_aligned_in_smp; +#endif + struct rcu_head rcu; }; struct file_system_type { const char *name; int fs_flags; - int (*get_sb) (struct file_system_type *, int, - const char *, void *, struct vfsmount *); +#define FS_REQUIRES_DEV 1 +#define FS_BINARY_MOUNTDATA 2 +#define FS_HAS_SUBTYPE 4 +#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ +#define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ +#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ + struct dentry *(*mount) (struct file_system_type *, int, + const char *, void *); void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; - struct list_head fs_supers; + struct hlist_head fs_supers; + +#ifndef __UBOOT__ + struct lock_class_key s_lock_key; + struct lock_class_key s_umount_key; + struct lock_class_key s_vfs_rename_key; + struct lock_class_key s_writers_key[SB_FREEZE_LEVELS]; + + struct lock_class_key i_lock_key; + struct lock_class_key i_mutex_key; + struct lock_class_key i_mutex_dir_key; +#endif }; +/* include/linux/mount.h */ struct vfsmount { - struct list_head mnt_hash; - struct vfsmount *mnt_parent; /* fs we are mounted on */ - struct dentry *mnt_mountpoint; /* dentry of mountpoint */ struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ - struct list_head mnt_mounts; /* list of children, anchored here */ - struct list_head mnt_child; /* and going through their mnt_child */ int mnt_flags; - /* 4 bytes hole on 64bits arches */ - const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ - struct list_head mnt_list; - struct list_head mnt_expire; /* link in fs-specific expiry list */ - struct list_head mnt_share; /* circular list of shared mounts */ - struct list_head mnt_slave_list;/* list of slave mounts */ - struct list_head mnt_slave; /* slave list entry */ - struct vfsmount *mnt_master; /* slave is on master->mnt_slave_list */ - struct mnt_namespace *mnt_ns; /* containing namespace */ - int mnt_id; /* mount identifier */ - int mnt_group_id; /* peer group identifier */ - /* - * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount - * to let these frequently modified fields in a separate cache line - * (so that reads of mnt_flags wont ping-pong on SMP machines) - */ - int mnt_expiry_mark; /* true if marked for expiry */ - int mnt_pinned; - int mnt_ghosts; - /* - * This value is not stable unless all of the mnt_writers[] spinlocks - * are held, and all mnt_writer[]s on this mount have 0 as their ->count - */ }; struct path { @@ -451,32 +617,35 @@ static inline ino_t parent_ino(struct dentry *dentry) /* debug.c */ -#define DEFINE_SPINLOCK(...) #define module_param_named(...) /* misc.h */ #define mutex_lock_nested(...) #define mutex_unlock_nested(...) #define mutex_is_locked(...) 0 +#endif /* Version of this UBIFS implementation */ #define UBIFS_VERSION 1 /* Normal UBIFS messages */ -#ifdef CONFIG_UBIFS_SILENCE_MSG -#define ubifs_msg(fmt, ...) -#else -#define ubifs_msg(fmt, ...) \ - printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__) -#endif +#define ubifs_msg(fmt, ...) pr_notice("UBIFS: " fmt "\n", ##__VA_ARGS__) /* UBIFS error messages */ -#define ubifs_err(fmt, ...) \ - printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", 0, \ +#ifndef __UBOOT__ +#define ubifs_err(fmt, ...) \ + pr_err("UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ __func__, ##__VA_ARGS__) /* UBIFS warning messages */ -#define ubifs_warn(fmt, ...) \ - printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \ - 0, __func__, ##__VA_ARGS__) +#define ubifs_warn(fmt, ...) \ + pr_warn("UBIFS warning (pid %d): %s: " fmt "\n", \ + current->pid, __func__, ##__VA_ARGS__) +#else +#define ubifs_err(fmt, ...) \ + pr_err("UBIFS error: %s: " fmt "\n", __func__, ##__VA_ARGS__) +/* UBIFS warning messages */ +#define ubifs_warn(fmt, ...) \ + pr_warn("UBIFS warning: %s: " fmt "\n", __func__, ##__VA_ARGS__) +#endif /* UBIFS file system VFS magic number */ #define UBIFS_SUPER_MAGIC 0x24051905 @@ -509,9 +678,6 @@ static inline ino_t parent_ino(struct dentry *dentry) #define INUM_WARN_WATERMARK 0xFFF00000 #define INUM_WATERMARK 0xFFFFFF00 -/* Largest key size supported in this implementation */ -#define CUR_MAX_KEY_LEN UBIFS_SK_LEN - /* Maximum number of entries in each LPT (LEB category) heap */ #define LPT_HEAP_SZ 256 @@ -521,8 +687,9 @@ static inline ino_t parent_ino(struct dentry *dentry) */ #define BGT_NAME_PATTERN "ubifs_bgt%d_%d" -/* Default write-buffer synchronization timeout (5 secs) */ -#define DEFAULT_WBUF_TIMEOUT (5 * HZ) +/* Write-buffer synchronization timeout interval in seconds */ +#define WBUF_TIMEOUT_SOFTLIMIT 3 +#define WBUF_TIMEOUT_HARDLIMIT 5 /* Maximum possible inode number (only 32-bit inodes are supported now) */ #define MAX_INUM 0xFFFFFFFF @@ -530,12 +697,10 @@ static inline ino_t parent_ino(struct dentry *dentry) /* Number of non-data journal heads */ #define NONDATA_JHEADS_CNT 2 -/* Garbage collector head */ -#define GCHD 0 -/* Base journal head number */ -#define BASEHD 1 -/* First "general purpose" journal head */ -#define DATAHD 2 +/* Shorter names for journal head numbers for internal usage */ +#define GCHD UBIFS_GC_HEAD +#define BASEHD UBIFS_BASE_HEAD +#define DATAHD UBIFS_DATA_HEAD /* 'No change' value for 'ubifs_change_lp()' */ #define LPROPS_NC 0x80000001 @@ -545,8 +710,12 @@ static inline ino_t parent_ino(struct dentry *dentry) * in TNC. However, when replaying, it is handy to introduce fake "truncation" * keys for truncation nodes because the code becomes simpler. So we define * %UBIFS_TRUN_KEY type. + * + * But otherwise, out of the journal reply scope, the truncation keys are + * invalid. */ -#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT +#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT +#define UBIFS_INVALID_KEY UBIFS_KEY_TYPES_CNT /* * How much a directory entry/extended attribute entry adds to the parent/host @@ -573,6 +742,12 @@ static inline ino_t parent_ino(struct dentry *dentry) */ #define WORST_COMPR_FACTOR 2 +/* + * How much memory is needed for a buffer where we comress a data node. + */ +#define COMPRESSED_DATA_NODE_BUF_SZ \ + (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR) + /* Maximum expected tree height for use by bottom_up_buf */ #define BOTTOM_UP_HEIGHT 64 @@ -646,14 +821,14 @@ enum { * LPT cnode flag bits. * * DIRTY_CNODE: cnode is dirty - * COW_CNODE: cnode is being committed and must be copied before writing * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), - * so it can (and must) be freed when the commit is finished + * so it can (and must) be freed when the commit is finished + * COW_CNODE: cnode is being committed and must be copied before writing */ enum { DIRTY_CNODE = 0, - COW_CNODE = 1, - OBSOLETE_CNODE = 2, + OBSOLETE_CNODE = 1, + COW_CNODE = 2, }; /* @@ -693,10 +868,10 @@ struct ubifs_old_idx { /* The below union makes it easier to deal with keys */ union ubifs_key { - uint8_t u8[CUR_MAX_KEY_LEN]; - uint32_t u32[CUR_MAX_KEY_LEN/4]; - uint64_t u64[CUR_MAX_KEY_LEN/8]; - __le32 j32[CUR_MAX_KEY_LEN/4]; + uint8_t u8[UBIFS_SK_LEN]; + uint32_t u32[UBIFS_SK_LEN/4]; + uint64_t u64[UBIFS_SK_LEN/8]; + __le32 j32[UBIFS_SK_LEN/4]; }; /** @@ -805,9 +980,9 @@ struct ubifs_gced_idx_leb { * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot * make sure @inode->i_size is always changed under @ui_mutex, because it - * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock - * with 'ubifs_writepage()' (see file.c). All the other inode fields are - * changed under @ui_mutex, so they do not need "shadow" fields. Note, one + * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would + * deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields + * are changed under @ui_mutex, so they do not need "shadow" fields. Note, one * could consider to rework locking and base it on "shadow" fields. */ struct ubifs_inode { @@ -1068,17 +1243,19 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, * @offs: write-buffer offset in this logical eraseblock * @avail: number of bytes available in the write-buffer * @used: number of used bytes in the write-buffer - * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM, - * %UBI_UNKNOWN) + * @size: write-buffer size (in [@c->min_io_size, @c->max_write_size] range) * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep * up by 'mutex_lock_nested()). * @sync_callback: write-buffer synchronization callback * @io_mutex: serializes write-buffer I/O * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes * fields + * @softlimit: soft write-buffer timeout interval + * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit + * and @softlimit + @delta) * @timer: write-buffer timer - * @timeout: timer expire interval in jiffies - * @need_sync: it is set if its timer expired and needs sync + * @no_timer: non-zero if this write-buffer does not have a timer + * @need_sync: non-zero if the timer expired and the wbuf needs sync'ing * @next_ino: points to the next position of the following inode number * @inodes: stores the inode numbers of the nodes which are in wbuf * @@ -1099,13 +1276,16 @@ struct ubifs_wbuf { int offs; int avail; int used; - int dtype; + int size; int jhead; int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); struct mutex io_mutex; spinlock_t lock; - int timeout; - int need_sync; +// ktime_t softlimit; +// unsigned long long delta; +// struct hrtimer timer; + unsigned int no_timer:1; + unsigned int need_sync:1; int next_ino; ino_t *inodes; }; @@ -1130,12 +1310,14 @@ struct ubifs_bud { * struct ubifs_jhead - journal head. * @wbuf: head's write-buffer * @buds_list: list of bud LEBs belonging to this journal head + * @grouped: non-zero if UBIFS groups nodes when writing to this journal head * * Note, the @buds list is protected by the @c->buds_lock. */ struct ubifs_jhead { struct ubifs_wbuf wbuf; struct list_head buds_list; + unsigned int grouped:1; }; /** @@ -1171,6 +1353,9 @@ struct ubifs_zbranch { * @offs: offset of the corresponding indexing node * @len: length of the corresponding indexing node * @zbranch: array of znode branches (@c->fanout elements) + * + * Note! The @lnum, @offs, and @len fields are not really needed - we have them + * only for internal consistency check. They could be removed to save some RAM. */ struct ubifs_znode { struct ubifs_znode *parent; @@ -1181,9 +1366,9 @@ struct ubifs_znode { int child_cnt; int iip; int alt; -#ifdef CONFIG_UBIFS_FS_DEBUG - int lnum, offs, len; -#endif + int lnum; + int offs; + int len; struct ubifs_zbranch zbranch[]; }; @@ -1236,10 +1421,15 @@ struct ubifs_node_range { */ struct ubifs_compressor { int compr_type; - char *name; - char *capi_name; + struct crypto_comp *cc; + struct mutex *comp_mutex; + struct mutex *decomp_mutex; + const char *name; + const char *capi_name; +#ifdef __UBOOT__ int (*decompress)(const unsigned char *in, size_t in_len, unsigned char *out, size_t *out_len); +#endif }; /** @@ -1313,6 +1503,8 @@ struct ubifs_budget_req { * @dnext: next orphan to delete * @inum: inode number * @new: %1 => added since the last commit, otherwise %0 + * @cmt: %1 => commit pending, otherwise %0 + * @del: %1 => delete pending, otherwise %0 */ struct ubifs_orphan { struct rb_node rb; @@ -1321,7 +1513,9 @@ struct ubifs_orphan { struct ubifs_orphan *cnext; struct ubifs_orphan *dnext; ino_t inum; - int new; + unsigned new:1; + unsigned cmt:1; + unsigned del:1; }; /** @@ -1344,6 +1538,40 @@ struct ubifs_mount_opts { unsigned int compr_type:2; }; +/** + * struct ubifs_budg_info - UBIFS budgeting information. + * @idx_growth: amount of bytes budgeted for index growth + * @data_growth: amount of bytes budgeted for cached data + * @dd_growth: amount of bytes budgeted for cached data that will make + * other data dirty + * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but + * which still have to be taken into account because the index + * has not been committed so far + * @old_idx_sz: size of index on flash + * @min_idx_lebs: minimum number of LEBs required for the index + * @nospace: non-zero if the file-system does not have flash space (used as + * optimization) + * @nospace_rp: the same as @nospace, but additionally means that even reserved + * pool is full + * @page_budget: budget for a page (constant, nenver changed after mount) + * @inode_budget: budget for an inode (constant, nenver changed after mount) + * @dent_budget: budget for a directory entry (constant, nenver changed after + * mount) + */ +struct ubifs_budg_info { + long long idx_growth; + long long data_growth; + long long dd_growth; + long long uncommitted_idx; + unsigned long long old_idx_sz; + int min_idx_lebs; + unsigned int nospace:1; + unsigned int nospace_rp:1; + int page_budget; + int inode_budget; + int dent_budget; +}; + struct ubifs_debug_info; /** @@ -1387,6 +1615,7 @@ struct ubifs_debug_info; * @cmt_wq: wait queue to sleep on if the log is full and a commit is running * * @big_lpt: flag that LPT is too big to write whole during commit + * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up * @no_chk_data_crc: do not check CRCs when reading data nodes (except during * recovery) * @bulk_read: enable bulk-reads @@ -1418,6 +1647,11 @@ struct ubifs_debug_info; * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu * @bu: pre-allocated bulk-read information * + * @write_reserve_mutex: protects @write_reserve_buf + * @write_reserve_buf: on the write path we allocate memory, which might + * sometimes be unavailable, in which case we use this + * write reserve buffer + * * @log_lebs: number of logical eraseblocks in the log * @log_bytes: log size in bytes * @log_last: last LEB of the log @@ -1439,43 +1673,34 @@ struct ubifs_debug_info; * * @min_io_size: minimal input/output unit size * @min_io_shift: number of bits in @min_io_size minus one + * @max_write_size: maximum amount of bytes the underlying flash can write at a + * time (MTD write buffer size) + * @max_write_shift: number of bits in @max_write_size minus one * @leb_size: logical eraseblock size in bytes + * @leb_start: starting offset of logical eraseblocks within physical + * eraseblocks * @half_leb_size: half LEB size + * @idx_leb_size: how many bytes of an LEB are effectively available when it is + * used to store indexing nodes (@leb_size - @max_idx_node_sz) * @leb_cnt: count of logical eraseblocks * @max_leb_cnt: maximum count of logical eraseblocks * @old_leb_cnt: count of logical eraseblocks before re-size * @ro_media: the underlying UBI volume is read-only + * @ro_mount: the file-system was mounted as read-only + * @ro_error: UBIFS switched to R/O mode because an error happened * * @dirty_pg_cnt: number of dirty pages (not used) * @dirty_zn_cnt: number of dirty znodes * @clean_zn_cnt: number of clean znodes * - * @budg_idx_growth: amount of bytes budgeted for index growth - * @budg_data_growth: amount of bytes budgeted for cached data - * @budg_dd_growth: amount of bytes budgeted for cached data that will make - * other data dirty - * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index, - * but which still have to be taken into account because - * the index has not been committed so far - * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, - * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst, - * @nospace, and @nospace_rp; - * @min_idx_lebs: minimum number of LEBs required for the index - * @old_idx_sz: size of index on flash + * @space_lock: protects @bi and @lst + * @lst: lprops statistics + * @bi: budgeting information * @calc_idx_sz: temporary variable which is used to calculate new index size * (contains accurate new index size at end of TNC commit start) - * @lst: lprops statistics - * @nospace: non-zero if the file-system does not have flash space (used as - * optimization) - * @nospace_rp: the same as @nospace, but additionally means that even reserved - * pool is full - * - * @page_budget: budget for a page - * @inode_budget: budget for an inode - * @dent_budget: budget for a directory entry * * @ref_node_alsz: size of the LEB reference node aligned to the min. flash - * I/O unit + * I/O unit * @mst_node_alsz: master node aligned size * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary @@ -1558,9 +1783,11 @@ struct ubifs_debug_info; * previous commit start * @uncat_list: list of un-categorized LEBs * @empty_list: list of empty LEBs - * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size) - * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size) + * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size) + * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size) * @freeable_cnt: number of freeable LEBs in @freeable_list + * @in_a_category_cnt: count of lprops which are in a certain category, which + * basically meants that they were loaded from the flash * * @ltab_lnum: LEB number of LPT's own lprops table * @ltab_offs: offset of LPT's own lprops table @@ -1577,25 +1804,29 @@ struct ubifs_debug_info; * @rp_uid: reserved pool user ID * @rp_gid: reserved pool group ID * - * @empty: if the UBI device is empty - * @replay_tree: temporary tree used during journal replay + * @empty: %1 if the UBI device is empty + * @need_recovery: %1 if the file-system needs recovery + * @replaying: %1 during journal replay + * @mounting: %1 while mounting + * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode * @replay_list: temporary list used during journal replay * @replay_buds: list of buds to replay * @cs_sqnum: sequence number of first node in the log (commit start node) * @replay_sqnum: sequence number of node currently being replayed - * @need_recovery: file-system needs recovery - * @replaying: set to %1 during journal replay - * @unclean_leb_list: LEBs to recover when mounting ro to rw - * @rcvrd_mst_node: recovered master node to write when mounting ro to rw + * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W + * mode + * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted + * FS to R/W mode * @size_tree: inode size information for recovery - * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) - * @always_chk_crc: always check CRCs (while mounting and remounting rw) * @mount_opts: UBIFS-specific mount options * * @dbg: debugging-related information */ struct ubifs_info { struct super_block *vfs_sb; +#ifndef __UBOOT__ + struct backing_dev_info bdi; +#endif ino_t highest_inum; unsigned long long max_sqnum; @@ -1628,6 +1859,7 @@ struct ubifs_info { wait_queue_head_t cmt_wq; unsigned int big_lpt:1; + unsigned int space_fixup:1; unsigned int no_chk_data_crc:1; unsigned int bulk_read:1; unsigned int default_compr:2; @@ -1657,6 +1889,9 @@ struct ubifs_info { struct mutex bu_mutex; struct bu_info bu; + struct mutex write_reserve_mutex; + void *write_reserve_buf; + int log_lebs; long long log_bytes; int log_last; @@ -1678,28 +1913,27 @@ struct ubifs_info { int min_io_size; int min_io_shift; + int max_write_size; + int max_write_shift; int leb_size; + int leb_start; int half_leb_size; + int idx_leb_size; int leb_cnt; int max_leb_cnt; int old_leb_cnt; - int ro_media; + unsigned int ro_media:1; + unsigned int ro_mount:1; + unsigned int ro_error:1; + + atomic_long_t dirty_pg_cnt; + atomic_long_t dirty_zn_cnt; + atomic_long_t clean_zn_cnt; - long long budg_idx_growth; - long long budg_data_growth; - long long budg_dd_growth; - long long budg_uncommitted_idx; spinlock_t space_lock; - int min_idx_lebs; - unsigned long long old_idx_sz; - unsigned long long calc_idx_sz; struct ubifs_lp_stats lst; - unsigned int nospace:1; - unsigned int nospace_rp:1; - - int page_budget; - int inode_budget; - int dent_budget; + struct ubifs_budg_info bi; + unsigned long long calc_idx_sz; int ref_node_alsz; int mst_node_alsz; @@ -1785,6 +2019,7 @@ struct ubifs_info { struct list_head freeable_list; struct list_head frdi_idx_list; int freeable_cnt; + int in_a_category_cnt; int ltab_lnum; int ltab_offs; @@ -1798,31 +2033,32 @@ struct ubifs_info { long long rp_size; long long report_rp_size; - uid_t rp_uid; - gid_t rp_gid; + kuid_t rp_uid; + kgid_t rp_gid; /* The below fields are used only during mounting and re-mounting */ - int empty; - struct rb_root replay_tree; + unsigned int empty:1; + unsigned int need_recovery:1; + unsigned int replaying:1; + unsigned int mounting:1; + unsigned int remounting_rw:1; struct list_head replay_list; struct list_head replay_buds; unsigned long long cs_sqnum; unsigned long long replay_sqnum; - int need_recovery; - int replaying; struct list_head unclean_leb_list; struct ubifs_mst_node *rcvrd_mst_node; struct rb_root size_tree; - int remounting_rw; - int always_chk_crc; struct ubifs_mount_opts mount_opts; -#ifdef CONFIG_UBIFS_FS_DEBUG +#ifndef __UBOOT__ struct ubifs_debug_info *dbg; #endif }; +extern struct list_head ubifs_infos; extern spinlock_t ubifs_infos_lock; +extern atomic_long_t ubifs_clean_zn_cnt; extern struct kmem_cache *ubifs_inode_slab; extern const struct super_operations ubifs_super_operations; extern const struct address_space_operations ubifs_file_address_operations; @@ -1836,16 +2072,23 @@ extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; /* io.c */ void ubifs_ro_mode(struct ubifs_info *c, int err); +int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, + int len, int even_ebadmsg); +int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, + int len); +int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len); +int ubifs_leb_unmap(struct ubifs_info *c, int lnum); +int ubifs_leb_map(struct ubifs_info *c, int lnum); +int ubifs_is_mapped(const struct ubifs_info *c, int lnum); int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); -int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, - int dtype); +int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs); int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf); int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, int lnum, int offs); int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, int lnum, int offs); int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, - int offs, int dtype); + int offs); int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, int offs, int quiet, int must_chk_crc); void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); @@ -1859,7 +2102,7 @@ int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode); /* scan.c */ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, - int offs, void *sbuf); + int offs, void *sbuf, int quiet); void ubifs_scan_destroy(struct ubifs_scan_leb *sleb); int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, int offs, int quiet); @@ -1921,7 +2164,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free); long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); /* find.c */ -int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, +int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, int squeeze); int ubifs_find_free_leb_for_idx(struct ubifs_info *c); int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, @@ -1983,8 +2226,13 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); int ubifs_tnc_end_commit(struct ubifs_info *c); +#ifndef __UBOOT__ /* shrinker.c */ -int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask); +unsigned long ubifs_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc); +unsigned long ubifs_shrink_count(struct shrinker *shrink, + struct shrink_control *sc); +#endif /* commit.c */ int ubifs_bg_thread(void *info); @@ -2003,6 +2251,7 @@ int ubifs_write_master(struct ubifs_info *c); int ubifs_read_superblock(struct ubifs_info *c); struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); +int ubifs_fixup_free_space(struct ubifs_info *c); /* replay.c */ int ubifs_validate_entry(struct ubifs_info *c, @@ -2084,14 +2333,15 @@ const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c); const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c); const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c); const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); +int ubifs_calc_dark(const struct ubifs_info *c, int spc); /* file.c */ -int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); +int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync); int ubifs_setattr(struct dentry *dentry, struct iattr *attr); /* dir.c */ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, - int mode); + umode_t mode); int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); @@ -2111,11 +2361,11 @@ int ubifs_iput(struct inode *inode); int ubifs_recover_master_node(struct ubifs_info *c); int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, - int offs, void *sbuf, int grouped); + int offs, void *sbuf, int jhead); struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf); -int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); -int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf); +int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf); +int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf); int ubifs_rcvry_gc_commit(struct ubifs_info *c); int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, int deletion, loff_t new_size); @@ -2131,24 +2381,22 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); /* compressor.c */ int __init ubifs_compressors_init(void); -void __exit ubifs_compressors_exit(void); +void ubifs_compressors_exit(void); void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, int *compr_type); int ubifs_decompress(const void *buf, int len, void *out, int *out_len, int compr_type); +#include "debug.h" +#include "misc.h" +#include "key.h" + +#ifdef __UBOOT__ /* these are used in cmd_ubifs.c */ int ubifs_init(void); -int ubifs_mount(char *vol_name); +int uboot_ubifs_mount(char *vol_name); void ubifs_umount(struct ubifs_info *c); int ubifs_ls(char *dir_name); int ubifs_load(char *filename, u32 addr, u32 size); - -#include "debug.h" -#include "misc.h" -#include "key.h" - -/* todo: Move these to a common U-Boot header */ -int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, - unsigned char *out, size_t *out_len); +#endif #endif /* !__UBIFS_H__ */ diff --git a/include/linux/mtd/bbm.h b/include/linux/mtd/bbm.h index 25a3d3a..be81d38 100644 --- a/include/linux/mtd/bbm.h +++ b/include/linux/mtd/bbm.h @@ -4,13 +4,14 @@ * NAND family Bad Block Management (BBM) header file * - Bad Block Table (BBT) implementation * - * Copyright (c) 2005-2007 Samsung Electronics + * Copyright © 2005 Samsung Electronics * Kyungmin Park <kyungmin.park@samsung.com> * - * Copyright (c) 2000-2005 + * Copyright © 2000-2005 * Thomas Gleixner <tglx@linuxtronix.de> * * SPDX-License-Identifier: GPL-2.0+ + * */ #ifndef __LINUX_MTD_BBM_H #define __LINUX_MTD_BBM_H @@ -22,22 +23,21 @@ /** * struct nand_bbt_descr - bad block table descriptor - * @param options options for this descriptor - * @param pages the page(s) where we find the bbt, used with - * option BBT_ABSPAGE when bbt is searched, - * then we store the found bbts pages here. - * Its an array and supports up to 8 chips now - * @param offs offset of the pattern in the oob area of the page - * @param veroffs offset of the bbt version counter in the oob are of the page - * @param version version read from the bbt page during scan - * @param len length of the pattern, if 0 no pattern check is performed - * @param maxblocks maximum number of blocks to search for a bbt. This number of - * blocks is reserved at the end of the device - * where the tables are written. - * @param reserved_block_code if non-0, this pattern denotes a reserved - * (rather than bad) block in the stored bbt - * @param pattern pattern to identify bad block table or factory marked - * good / bad blocks, can be NULL, if len = 0 + * @options: options for this descriptor + * @pages: the page(s) where we find the bbt, used with option BBT_ABSPAGE + * when bbt is searched, then we store the found bbts pages here. + * Its an array and supports up to 8 chips now + * @offs: offset of the pattern in the oob area of the page + * @veroffs: offset of the bbt version counter in the oob are of the page + * @version: version read from the bbt page during scan + * @len: length of the pattern, if 0 no pattern check is performed + * @maxblocks: maximum number of blocks to search for a bbt. This number of + * blocks is reserved at the end of the device where the tables are + * written. + * @reserved_block_code: if non-0, this pattern denotes a reserved (rather than + * bad) block in the stored bbt + * @pattern: pattern to identify bad block table or factory marked good / + * bad blocks, can be NULL, if len = 0 * * Descriptor for the bad block table marker and the descriptor for the * pattern which identifies good and bad blocks. The assumption is made @@ -81,10 +81,6 @@ struct nand_bbt_descr { * with NAND_BBT_CREATE. */ #define NAND_BBT_CREATE_EMPTY 0x00000400 -/* Search good / bad pattern through all pages of a block */ -#define NAND_BBT_SCANALLPAGES 0x00000800 -/* Scan block empty during good / bad block scan */ -#define NAND_BBT_SCANEMPTY 0x00001000 /* Write bbt if neccecary */ #define NAND_BBT_WRITE 0x00002000 /* Read and write back block contents when writing bbt */ @@ -122,22 +118,27 @@ struct nand_bbt_descr { /* * Constants for oob configuration */ -#define ONENAND_BADBLOCK_POS 0 +#define NAND_SMALL_BADBLOCK_POS 5 +#define NAND_LARGE_BADBLOCK_POS 0 +#define ONENAND_BADBLOCK_POS 0 /* * Bad block scanning errors */ -#define ONENAND_BBT_READ_ERROR 1 -#define ONENAND_BBT_READ_ECC_ERROR 2 -#define ONENAND_BBT_READ_FATAL_ERROR 4 +#define ONENAND_BBT_READ_ERROR 1 +#define ONENAND_BBT_READ_ECC_ERROR 2 +#define ONENAND_BBT_READ_FATAL_ERROR 4 /** - * struct bbt_info - [GENERIC] Bad Block Table data structure - * @param bbt_erase_shift [INTERN] number of address bits in a bbt entry - * @param badblockpos [INTERN] position of the bad block marker in the oob area - * @param bbt [INTERN] bad block table pointer - * @param badblock_pattern [REPLACEABLE] bad block scan pattern used for initial bad block scan - * @param priv [OPTIONAL] pointer to private bbm date + * struct bbm_info - [GENERIC] Bad Block Table data structure + * @bbt_erase_shift: [INTERN] number of address bits in a bbt entry + * @badblockpos: [INTERN] position of the bad block marker in the oob area + * @options: options for this descriptor + * @bbt: [INTERN] bad block table pointer + * @isbad_bbt: function to determine if a block is bad + * @badblock_pattern: [REPLACEABLE] bad block scan pattern used for + * initial bad block scan + * @priv: [OPTIONAL] pointer to private bbm date */ struct bbm_info { int bbt_erase_shift; @@ -146,7 +147,7 @@ struct bbm_info { uint8_t *bbt; - int (*isbad_bbt) (struct mtd_info * mtd, loff_t ofs, int allowbbt); + int (*isbad_bbt)(struct mtd_info *mtd, loff_t ofs, int allowbbt); /* TODO Add more NAND specific fileds */ struct nand_bbt_descr *badblock_pattern; @@ -155,7 +156,7 @@ struct bbm_info { }; /* OneNAND BBT interface */ -extern int onenand_scan_bbt (struct mtd_info *mtd, struct nand_bbt_descr *bd); -extern int onenand_default_bbt (struct mtd_info *mtd); +extern int onenand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd); +extern int onenand_default_bbt(struct mtd_info *mtd); -#endif /* __LINUX_MTD_BBM_H */ +#endif /* __LINUX_MTD_BBM_H */ diff --git a/include/linux/mtd/concat.h b/include/linux/mtd/concat.h index c92b4dd..195a4a5 100644 --- a/include/linux/mtd/concat.h +++ b/include/linux/mtd/concat.h @@ -12,7 +12,11 @@ struct mtd_info *mtd_concat_create( struct mtd_info *subdev[], /* subdevices to concatenate */ int num_devs, /* number of subdevices */ +#ifndef __UBOOT__ const char *name); /* name for the new device */ +#else + char *name); /* name for the new device */ +#endif void mtd_concat_destroy(struct mtd_info *mtd); diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h new file mode 100644 index 0000000..7028ee1 --- /dev/null +++ b/include/linux/mtd/flashchip.h @@ -0,0 +1,105 @@ +/* + * Copyright © 2000 Red Hat UK Limited + * Copyright © 2000-2010 David Woodhouse <dwmw2@infradead.org> + * + * SPDX-License-Identifier: GPL-2.0+ + * + */ + +#ifndef __MTD_FLASHCHIP_H__ +#define __MTD_FLASHCHIP_H__ + +#define __UBOOT__ +#ifndef __UBOOT__ +/* For spinlocks. sched.h includes spinlock.h from whichever directory it + * happens to be in - so we don't have to care whether we're on 2.2, which + * has asm/spinlock.h, or 2.4, which has linux/spinlock.h + */ +#include <linux/sched.h> +#include <linux/mutex.h> +#endif + +typedef enum { + FL_READY, + FL_STATUS, + FL_CFI_QUERY, + FL_JEDEC_QUERY, + FL_ERASING, + FL_ERASE_SUSPENDING, + FL_ERASE_SUSPENDED, + FL_WRITING, + FL_WRITING_TO_BUFFER, + FL_OTP_WRITE, + FL_WRITE_SUSPENDING, + FL_WRITE_SUSPENDED, + FL_PM_SUSPENDED, + FL_SYNCING, + FL_UNLOADING, + FL_LOCKING, + FL_UNLOCKING, + FL_POINT, + FL_XIP_WHILE_ERASING, + FL_XIP_WHILE_WRITING, + FL_SHUTDOWN, + /* These 2 come from nand_state_t, which has been unified here */ + FL_READING, + FL_CACHEDPRG, + /* These 4 come from onenand_state_t, which has been unified here */ + FL_RESETING, + FL_OTPING, + FL_PREPARING_ERASE, + FL_VERIFYING_ERASE, + + FL_UNKNOWN +} flstate_t; + + + +/* NOTE: confusingly, this can be used to refer to more than one chip at a time, + if they're interleaved. This can even refer to individual partitions on + the same physical chip when present. */ + +struct flchip { + unsigned long start; /* Offset within the map */ + // unsigned long len; + /* We omit len for now, because when we group them together + we insist that they're all of the same size, and the chip size + is held in the next level up. If we get more versatile later, + it'll make it a damn sight harder to find which chip we want from + a given offset, and we'll want to add the per-chip length field + back in. + */ + int ref_point_counter; + flstate_t state; + flstate_t oldstate; + + unsigned int write_suspended:1; + unsigned int erase_suspended:1; + unsigned long in_progress_block_addr; + + struct mutex mutex; +#ifndef __UBOOT__ + wait_queue_head_t wq; /* Wait on here when we're waiting for the chip + to be ready */ +#endif + int word_write_time; + int buffer_write_time; + int erase_time; + + int word_write_time_max; + int buffer_write_time_max; + int erase_time_max; + + void *priv; +}; + +/* This is used to handle contention on write/erase operations + between partitions of the same physical chip. */ +struct flchip_shared { + struct mutex lock; + struct flchip *writing; + struct flchip *erasing; +}; + + +#endif /* __MTD_FLASHCHIP_H__ */ diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index a65b681..b7b4757 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -1,48 +1,45 @@ /* - * Copyright (C) 1999-2003 David Woodhouse <dwmw2@infradead.org> et al. + * Copyright © 1999-2010 David Woodhouse <dwmw2@infradead.org> et al. * * Released under GPL + * */ #ifndef __MTD_MTD_H__ #define __MTD_MTD_H__ +#define __UBOOT__ +#ifndef __UBOOT__ #include <linux/types.h> -#include <div64.h> +#include <linux/uio.h> +#include <linux/notifier.h> +#include <linux/device.h> + +#include <mtd/mtd-abi.h> + +#include <asm/div64.h> +#else +#include <linux/compat.h> #include <mtd/mtd-abi.h> #include <asm/errno.h> +#include <div64.h> -#define MTD_CHAR_MAJOR 90 -#define MTD_BLOCK_MAJOR 31 #define MAX_MTD_DEVICES 32 +#endif #define MTD_ERASE_PENDING 0x01 #define MTD_ERASING 0x02 #define MTD_ERASE_SUSPEND 0x04 -#define MTD_ERASE_DONE 0x08 -#define MTD_ERASE_FAILED 0x10 +#define MTD_ERASE_DONE 0x08 +#define MTD_ERASE_FAILED 0x10 -#define MTD_FAIL_ADDR_UNKNOWN -1LL +#define MTD_FAIL_ADDR_UNKNOWN -1LL /* - * Enumeration for NAND/OneNAND flash chip state + * If the erase fails, fail_addr might indicate exactly which block failed. If + * fail_addr = MTD_FAIL_ADDR_UNKNOWN, the failure was not at the device level + * or was not specific to any particular block. */ -enum { - FL_READY, - FL_READING, - FL_WRITING, - FL_ERASING, - FL_SYNCING, - FL_CACHEDPRG, - FL_RESETING, - FL_UNLOCKING, - FL_LOCKING, - FL_PM_SUSPENDED, -}; - -/* If the erase fails, fail_addr might indicate exactly which block failed. If - fail_addr = MTD_FAIL_ADDR_UNKNOWN, the failure was not at the device level or was not - specific to any particular block. */ struct erase_info { struct mtd_info *mtd; uint64_t addr; @@ -50,8 +47,8 @@ struct erase_info { uint64_t fail_addr; u_long time; u_long retries; - u_int dev; - u_int cell; + unsigned dev; + unsigned cell; void (*callback) (struct erase_info *self); u_long priv; u_char state; @@ -60,9 +57,9 @@ struct erase_info { }; struct mtd_erase_region_info { - uint64_t offset; /* At which this region starts, from the beginning of the MTD */ - u_int32_t erasesize; /* For this region */ - u_int32_t numblocks; /* Number of blocks of erasesize in this region */ + uint64_t offset; /* At which this region starts, from the beginning of the MTD */ + uint32_t erasesize; /* For this region */ + uint32_t numblocks; /* Number of blocks of erasesize in this region */ unsigned long *lockmap; /* If keeping bitmap of locks */ }; @@ -81,7 +78,7 @@ struct mtd_erase_region_info { * @datbuf: data buffer - if NULL only oob data are read/written * @oobbuf: oob data buffer * - * Note, it is allowed to read more then one OOB area at one go, but not write. + * Note, it is allowed to read more than one OOB area at one go, but not write. * The interface assumes that the OOB write requests program only one page's * OOB area. */ @@ -109,26 +106,30 @@ struct mtd_oob_ops { #endif /* - * ECC layout control structure. Exported to userspace for - * diagnosis and to allow creation of raw images + * Internal ECC layout control structure. For historical reasons, there is a + * similar, smaller struct nand_ecclayout_user (in mtd-abi.h) that is retained + * for export to user-space via the ECCGETLAYOUT ioctl. + * nand_ecclayout should be expandable in the future simply by the above macros. */ struct nand_ecclayout { - uint32_t eccbytes; - uint32_t eccpos[MTD_MAX_ECCPOS_ENTRIES_LARGE]; - uint32_t oobavail; + __u32 eccbytes; + __u32 eccpos[MTD_MAX_ECCPOS_ENTRIES_LARGE]; + __u32 oobavail; struct nand_oobfree oobfree[MTD_MAX_OOBFREE_ENTRIES_LARGE]; }; +struct module; /* only needed for owner field in mtd_info */ + struct mtd_info { u_char type; - u_int32_t flags; - uint64_t size; /* Total size of the MTD */ + uint32_t flags; + uint64_t size; // Total size of the MTD /* "Major" erase size for the device. Naïve users may take this * to be the only erase size available, or may use the more detailed * information below if they desire */ - u_int32_t erasesize; + uint32_t erasesize; /* Minimal writable flash unit size. In case of NOR flash it is 1 (even * though individual bits can be cleared), in case of NAND flash it is * one NAND page (or half, or one-fourths of it), in case of ECC-ed NOR @@ -136,10 +137,31 @@ struct mtd_info { * Any driver registering a struct mtd_info must ensure a writesize of * 1 or larger. */ - u_int32_t writesize; + uint32_t writesize; + + /* + * Size of the write buffer used by the MTD. MTD devices having a write + * buffer can write multiple writesize chunks at a time. E.g. while + * writing 4 * writesize bytes to a device with 2 * writesize bytes + * buffer the MTD driver can (but doesn't have to) do 2 writesize + * operations, but not 4. Currently, all NANDs have writebufsize + * equivalent to writesize (NAND page size). Some NOR flashes do have + * writebufsize greater than writesize. + */ + uint32_t writebufsize; - u_int32_t oobsize; /* Amount of OOB data per block (e.g. 16) */ - u_int32_t oobavail; /* Available OOB bytes per block */ + uint32_t oobsize; // Amount of OOB data per block (e.g. 16) + uint32_t oobavail; // Available OOB bytes per block + + /* + * If erasesize is a power of 2 then the shift is stored in + * erasesize_shift otherwise erasesize_shift is zero. Ditto writesize. + */ + unsigned int erasesize_shift; + unsigned int writesize_shift; + /* Masks based on erasesize_shift and writesize_shift */ + unsigned int erasesize_mask; + unsigned int writesize_mask; /* * read ops return -EUCLEAN if max number of bitflips corrected on any @@ -150,13 +172,20 @@ struct mtd_info { */ unsigned int bitflip_threshold; - /* Kernel-only stuff starts here. */ + // Kernel-only stuff starts here. +#ifndef __UBOOT__ const char *name; +#else + char *name; +#endif int index; /* ECC layout structure pointer - read only! */ struct nand_ecclayout *ecclayout; + /* the ecc step size. */ + unsigned int ecc_step_size; + /* max number of correctible bit errors per ecc step */ unsigned int ecc_strength; @@ -171,44 +200,51 @@ struct mtd_info { * wrappers instead. */ int (*_erase) (struct mtd_info *mtd, struct erase_info *instr); +#ifndef __UBOOT__ int (*_point) (struct mtd_info *mtd, loff_t from, size_t len, - size_t *retlen, void **virt, phys_addr_t *phys); - void (*_unpoint) (struct mtd_info *mtd, loff_t from, size_t len); + size_t *retlen, void **virt, resource_size_t *phys); + int (*_unpoint) (struct mtd_info *mtd, loff_t from, size_t len); +#endif + unsigned long (*_get_unmapped_area) (struct mtd_info *mtd, + unsigned long len, + unsigned long offset, + unsigned long flags); int (*_read) (struct mtd_info *mtd, loff_t from, size_t len, - size_t *retlen, u_char *buf); + size_t *retlen, u_char *buf); int (*_write) (struct mtd_info *mtd, loff_t to, size_t len, - size_t *retlen, const u_char *buf); - - /* In blackbox flight recorder like scenarios we want to make successful - writes in interrupt context. panic_write() is only intended to be - called when its known the kernel is about to panic and we need the - write to succeed. Since the kernel is not going to be running for much - longer, this function can break locks and delay to ensure the write - succeeds (but not sleep). */ - - int (*_panic_write) (struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf); - + size_t *retlen, const u_char *buf); + int (*_panic_write) (struct mtd_info *mtd, loff_t to, size_t len, + size_t *retlen, const u_char *buf); int (*_read_oob) (struct mtd_info *mtd, loff_t from, - struct mtd_oob_ops *ops); + struct mtd_oob_ops *ops); int (*_write_oob) (struct mtd_info *mtd, loff_t to, - struct mtd_oob_ops *ops); + struct mtd_oob_ops *ops); int (*_get_fact_prot_info) (struct mtd_info *mtd, struct otp_info *buf, - size_t len); + size_t len); int (*_read_fact_prot_reg) (struct mtd_info *mtd, loff_t from, - size_t len, size_t *retlen, u_char *buf); + size_t len, size_t *retlen, u_char *buf); int (*_get_user_prot_info) (struct mtd_info *mtd, struct otp_info *buf, - size_t len); + size_t len); int (*_read_user_prot_reg) (struct mtd_info *mtd, loff_t from, - size_t len, size_t *retlen, u_char *buf); - int (*_write_user_prot_reg) (struct mtd_info *mtd, loff_t to, size_t len, - size_t *retlen, u_char *buf); + size_t len, size_t *retlen, u_char *buf); + int (*_write_user_prot_reg) (struct mtd_info *mtd, loff_t to, + size_t len, size_t *retlen, u_char *buf); int (*_lock_user_prot_reg) (struct mtd_info *mtd, loff_t from, - size_t len); + size_t len); +#ifndef __UBOOT__ + int (*_writev) (struct mtd_info *mtd, const struct kvec *vecs, + unsigned long count, loff_t to, size_t *retlen); +#endif void (*_sync) (struct mtd_info *mtd); int (*_lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len); int (*_unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len); + int (*_is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len); int (*_block_isbad) (struct mtd_info *mtd, loff_t ofs); int (*_block_markbad) (struct mtd_info *mtd, loff_t ofs); +#ifndef __UBOOT__ + int (*_suspend) (struct mtd_info *mtd); + void (*_resume) (struct mtd_info *mtd); +#endif /* * If the driver is something smart, like UBI, it may need to maintain * its own reference counting. The below functions are only for driver. @@ -216,16 +252,12 @@ struct mtd_info { int (*_get_device) (struct mtd_info *mtd); void (*_put_device) (struct mtd_info *mtd); -/* XXX U-BOOT XXX */ -#if 0 - /* kvec-based read/write methods. - NB: The 'count' parameter is the number of _vectors_, each of - which contains an (ofs, len) tuple. - */ - int (*writev) (struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, loff_t to, size_t *retlen); -#endif -/* XXX U-BOOT XXX */ -#if 0 +#ifndef __UBOOT__ + /* Backing device capabilities for this device + * - provides mmap capabilities + */ + struct backing_dev_info *backing_dev_info; + struct notifier_block reboot_notifier; /* default mode before reboot */ #endif @@ -237,10 +269,20 @@ struct mtd_info { void *priv; struct module *owner; +#ifndef __UBOOT__ + struct device dev; +#endif int usecount; }; int mtd_erase(struct mtd_info *mtd, struct erase_info *instr); +#ifndef __UBOOT__ +int mtd_point(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, + void **virt, resource_size_t *phys); +int mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len); +#endif +unsigned long mtd_get_unmapped_area(struct mtd_info *mtd, unsigned long len, + unsigned long offset, unsigned long flags); int mtd_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); int mtd_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, @@ -273,8 +315,7 @@ int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, u_char *buf); int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len); -/* XXX U-BOOT XXX */ -#if 0 +#ifndef __UBOOT__ int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, loff_t to, size_t *retlen); #endif @@ -291,22 +332,59 @@ int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len); int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs); int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs); +#ifndef __UBOOT__ +static inline int mtd_suspend(struct mtd_info *mtd) +{ + return mtd->_suspend ? mtd->_suspend(mtd) : 0; +} + +static inline void mtd_resume(struct mtd_info *mtd) +{ + if (mtd->_resume) + mtd->_resume(mtd); +} +#endif + static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd) { + if (mtd->erasesize_shift) + return sz >> mtd->erasesize_shift; do_div(sz, mtd->erasesize); return sz; } static inline uint32_t mtd_mod_by_eb(uint64_t sz, struct mtd_info *mtd) { + if (mtd->erasesize_shift) + return sz & mtd->erasesize_mask; return do_div(sz, mtd->erasesize); } +static inline uint32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd) +{ + if (mtd->writesize_shift) + return sz >> mtd->writesize_shift; + do_div(sz, mtd->writesize); + return sz; +} + +static inline uint32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd) +{ + if (mtd->writesize_shift) + return sz & mtd->writesize_mask; + return do_div(sz, mtd->writesize); +} + static inline int mtd_has_oob(const struct mtd_info *mtd) { return mtd->_read_oob && mtd->_write_oob; } +static inline int mtd_type_is_nand(const struct mtd_info *mtd) +{ + return mtd->type == MTD_NANDFLASH || mtd->type == MTD_MLCNANDFLASH; +} + static inline int mtd_can_have_bb(const struct mtd_info *mtd) { return !!mtd->_block_isbad; @@ -314,27 +392,36 @@ static inline int mtd_can_have_bb(const struct mtd_info *mtd) /* Kernel-side ioctl definitions */ -extern int add_mtd_device(struct mtd_info *mtd); -extern int del_mtd_device (struct mtd_info *mtd); - +struct mtd_partition; +struct mtd_part_parser_data; + +extern int mtd_device_parse_register(struct mtd_info *mtd, + const char * const *part_probe_types, + struct mtd_part_parser_data *parser_data, + const struct mtd_partition *defparts, + int defnr_parts); +#define mtd_device_register(master, parts, nr_parts) \ + mtd_device_parse_register(master, NULL, NULL, parts, nr_parts) +extern int mtd_device_unregister(struct mtd_info *master); extern struct mtd_info *get_mtd_device(struct mtd_info *mtd, int num); +extern int __get_mtd_device(struct mtd_info *mtd); +extern void __put_mtd_device(struct mtd_info *mtd); extern struct mtd_info *get_mtd_device_nm(const char *name); - extern void put_mtd_device(struct mtd_info *mtd); -extern void mtd_get_len_incl_bad(struct mtd_info *mtd, uint64_t offset, - const uint64_t length, uint64_t *len_incl_bad, - int *truncated); -/* XXX U-BOOT XXX */ -#if 0 + + +#ifndef __UBOOT__ struct mtd_notifier { void (*add)(struct mtd_info *mtd); void (*remove)(struct mtd_info *mtd); struct list_head list; }; + extern void register_mtd_user (struct mtd_notifier *new); extern int unregister_mtd_user (struct mtd_notifier *old); #endif +void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size); #ifdef CONFIG_MTD_PARTITIONS void mtd_erase_callback(struct erase_info *instr); @@ -346,6 +433,7 @@ static inline void mtd_erase_callback(struct erase_info *instr) } #endif +#ifdef __UBOOT__ /* * Debugging macro and defines */ @@ -372,7 +460,11 @@ static inline void mtd_erase_callback(struct erase_info *instr) #define pr_info(args...) MTDDEBUG(MTD_DEBUG_LEVEL0, args) #define pr_warn(args...) MTDDEBUG(MTD_DEBUG_LEVEL0, args) #define pr_err(args...) MTDDEBUG(MTD_DEBUG_LEVEL0, args) - +#define pr_crit(args...) MTDDEBUG(MTD_DEBUG_LEVEL0, args) +#define pr_cont(args...) MTDDEBUG(MTD_DEBUG_LEVEL0, args) +#define pr_notice(args...) MTDDEBUG(MTD_DEBUG_LEVEL0, args) +#endif + static inline int mtd_is_bitflip(int err) { return err == -EUCLEAN; } @@ -385,4 +477,10 @@ static inline int mtd_is_bitflip_or_eccerr(int err) { return mtd_is_bitflip(err) || mtd_is_eccerr(err); } +#ifdef __UBOOT__ +/* drivers/mtd/mtdcore.h */ +int add_mtd_device(struct mtd_info *mtd); +int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *, int); +int del_mtd_partitions(struct mtd_info *); +#endif #endif /* __MTD_MTD_H__ */ diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 991bd8e..489c703 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -5,9 +5,7 @@ * Steven J. Hill <sjhill@realitydiluted.com> * Thomas Gleixner <tglx@linutronix.de> * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. + * SPDX-License-Identifier: GPL-2.0+ * * Info: * Contains standard defines and IDs for NAND flash devices @@ -18,21 +16,32 @@ #ifndef __LINUX_MTD_NAND_H #define __LINUX_MTD_NAND_H +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/wait.h> +#include <linux/spinlock.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/flashchip.h> +#include <linux/mtd/bbm.h> +#else #include "config.h" #include "linux/compat.h" #include "linux/mtd/mtd.h" +#include "linux/mtd/flashchip.h" #include "linux/mtd/bbm.h" - +#endif struct mtd_info; struct nand_flash_dev; /* Scan and identify a NAND device */ -extern int nand_scan (struct mtd_info *mtd, int max_chips); -/* Separate phases of nand_scan(), allowing board driver to intervene - * and override command or ECC setup according to flash type */ +extern int nand_scan(struct mtd_info *mtd, int max_chips); +/* + * Separate phases of nand_scan(), allowing board driver to intervene + * and override command or ECC setup according to flash type. + */ extern int nand_scan_ident(struct mtd_info *mtd, int max_chips, - const struct nand_flash_dev *table); + struct nand_flash_dev *table); extern int nand_scan_tail(struct mtd_info *mtd); /* Free resources held by the NAND device */ @@ -41,12 +50,23 @@ extern void nand_release(struct mtd_info *mtd); /* Internal helper for board drivers which need to override command function */ extern void nand_wait_ready(struct mtd_info *mtd); +#ifndef __UBOOT__ +/* locks all blocks present in the device */ +extern int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len); + +/* unlocks specified locked blocks */ +extern int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len); + +/* The maximum number of NAND chips in an array */ +#define NAND_MAX_CHIPS 8 +#endif + /* * This constant declares the max. oobsize / page, which * is supported now. If you add a chip with bigger oobsize/page * adjust this accordingly. */ -#define NAND_MAX_OOBSIZE 640 +#define NAND_MAX_OOBSIZE 744 #define NAND_MAX_PAGESIZE 8192 /* @@ -76,7 +96,6 @@ extern void nand_wait_ready(struct mtd_info *mtd); #define NAND_CMD_READOOB 0x50 #define NAND_CMD_ERASE1 0x60 #define NAND_CMD_STATUS 0x70 -#define NAND_CMD_STATUS_MULTI 0x71 #define NAND_CMD_SEQIN 0x80 #define NAND_CMD_RNDIN 0x85 #define NAND_CMD_READID 0x90 @@ -87,10 +106,8 @@ extern void nand_wait_ready(struct mtd_info *mtd); #define NAND_CMD_RESET 0xff #define NAND_CMD_LOCK 0x2a -#define NAND_CMD_LOCK_TIGHT 0x2c #define NAND_CMD_UNLOCK1 0x23 #define NAND_CMD_UNLOCK2 0x24 -#define NAND_CMD_LOCK_STATUS 0x7a /* Extended commands for large page devices */ #define NAND_CMD_READSTART 0x30 @@ -164,21 +181,12 @@ typedef enum { /* Chip has copy back function */ #define NAND_COPYBACK 0x00000010 /* - * AND Chip which has 4 banks and a confusing page / block - * assignment. See Renesas datasheet for further information. + * Chip requires ready check on read (for auto-incremented sequential read). + * True only for small page devices; large page devices do not support + * autoincrement. */ -#define NAND_IS_AND 0x00000020 -/* - * Chip has a array of 4 pages which can be read without - * additional ready /busy waits. - */ -#define NAND_4PAGE_ARRAY 0x00000040 -/* - * Chip requires that BBT is periodically rewritten to prevent - * bits from adjacent blocks from 'leaking' in altering data. - * This happens with the Renesas AG-AND chips, possibly others. - */ -#define BBT_AUTO_REFRESH 0x00000080 +#define NAND_NEED_READRDY 0x00000100 + /* Chip does not allow subpage writes */ #define NAND_NO_SUBPAGE_WRITE 0x00000200 @@ -189,16 +197,13 @@ typedef enum { #define NAND_ROM 0x00000800 /* Device supports subpage reads */ -#define NAND_SUBPAGE_READ 0x00001000 +#define NAND_SUBPAGE_READ 0x00001000 /* Options valid for Samsung large page devices */ -#define NAND_SAMSUNG_LP_OPTIONS \ - (NAND_NO_PADDING | NAND_CACHEPRG | NAND_COPYBACK) +#define NAND_SAMSUNG_LP_OPTIONS NAND_CACHEPRG /* Macros to identify the above */ -#define NAND_MUST_PAD(chip) (!(chip->options & NAND_NO_PADDING)) #define NAND_HAS_CACHEPROG(chip) ((chip->options & NAND_CACHEPRG)) -#define NAND_HAS_COPYBACK(chip) ((chip->options & NAND_COPYBACK)) #define NAND_HAS_SUBPAGE_READ(chip) ((chip->options & NAND_SUBPAGE_READ)) /* Non chip related options */ @@ -211,6 +216,13 @@ typedef enum { #define NAND_OWN_BUFFERS 0x00020000 /* Chip may not exist, so silence any errors in scan */ #define NAND_SCAN_SILENT_NODEV 0x00040000 +/* + * Autodetect nand buswidth with readid/onfi. + * This suppose the driver will configure the hardware in 8 bits mode + * when calling nand_scan_ident, and update its configuration + * before calling nand_scan_tail. + */ +#define NAND_BUSWIDTH_AUTO 0x00080000 /* Options set by nand scan */ /* bbt has already been read */ @@ -221,10 +233,15 @@ typedef enum { /* Cell info constants */ #define NAND_CI_CHIPNR_MSK 0x03 #define NAND_CI_CELLTYPE_MSK 0x0C +#define NAND_CI_CELLTYPE_SHIFT 2 /* Keep gcc happy */ struct nand_chip; +/* ONFI features */ +#define ONFI_FEATURE_16_BIT_BUS (1 << 0) +#define ONFI_FEATURE_EXT_PARAM_PAGE (1 << 7) + /* ONFI timing mode, used in both asynchronous and synchronous mode */ #define ONFI_TIMING_MODE_0 (1 << 0) #define ONFI_TIMING_MODE_1 (1 << 1) @@ -237,9 +254,15 @@ struct nand_chip; /* ONFI feature address */ #define ONFI_FEATURE_ADDR_TIMING_MODE 0x1 +/* Vendor-specific feature address (Micron) */ +#define ONFI_FEATURE_ADDR_READ_RETRY 0x89 + /* ONFI subfeature parameters length */ #define ONFI_SUBFEATURE_PARAM_LEN 4 +/* ONFI optional commands SET/GET FEATURES supported? */ +#define ONFI_OPT_CMD_SET_GET_FEATURES (1 << 2) + struct nand_onfi_params { /* rev info and features block */ /* 'O' 'N' 'F' 'I' */ @@ -247,7 +270,10 @@ struct nand_onfi_params { __le16 revision; __le16 features; __le16 opt_cmd; - u8 reserved[22]; + u8 reserved0[2]; + __le16 ext_param_page_length; /* since ONFI 2.1 */ + u8 num_of_param_pages; /* since ONFI 2.1 */ + u8 reserved1[17]; /* manufacturer information block */ char manufacturer[12]; @@ -291,19 +317,74 @@ struct nand_onfi_params { __le16 io_pin_capacitance_typ; __le16 input_pin_capacitance_typ; u8 input_pin_capacitance_max; - u8 driver_strenght_support; + u8 driver_strength_support; __le16 t_int_r; __le16 t_ald; u8 reserved4[7]; /* vendor */ - u8 reserved5[90]; + __le16 vendor_revision; + u8 vendor[88]; __le16 crc; -} __attribute__((packed)); +} __packed; #define ONFI_CRC_BASE 0x4F4E +/* Extended ECC information Block Definition (since ONFI 2.1) */ +struct onfi_ext_ecc_info { + u8 ecc_bits; + u8 codeword_size; + __le16 bb_per_lun; + __le16 block_endurance; + u8 reserved[2]; +} __packed; + +#define ONFI_SECTION_TYPE_0 0 /* Unused section. */ +#define ONFI_SECTION_TYPE_1 1 /* for additional sections. */ +#define ONFI_SECTION_TYPE_2 2 /* for ECC information. */ +struct onfi_ext_section { + u8 type; + u8 length; +} __packed; + +#define ONFI_EXT_SECTION_MAX 8 + +/* Extended Parameter Page Definition (since ONFI 2.1) */ +struct onfi_ext_param_page { + __le16 crc; + u8 sig[4]; /* 'E' 'P' 'P' 'S' */ + u8 reserved0[10]; + struct onfi_ext_section sections[ONFI_EXT_SECTION_MAX]; + + /* + * The actual size of the Extended Parameter Page is in + * @ext_param_page_length of nand_onfi_params{}. + * The following are the variable length sections. + * So we do not add any fields below. Please see the ONFI spec. + */ +} __packed; + +struct nand_onfi_vendor_micron { + u8 two_plane_read; + u8 read_cache; + u8 read_unique_id; + u8 dq_imped; + u8 dq_imped_num_settings; + u8 dq_imped_feat_addr; + u8 rb_pulldown_strength; + u8 rb_pulldown_strength_feat_addr; + u8 rb_pulldown_strength_num_settings; + u8 otp_mode; + u8 otp_page_start; + u8 otp_data_prot_addr; + u8 otp_num_pages; + u8 otp_feat_addr; + u8 read_retry_options; + u8 reserved[72]; + u8 param_revision; +} __packed; + /** * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independent devices * @lock: protection lock @@ -313,12 +394,11 @@ struct nand_onfi_params { * when a hw controller is available. */ struct nand_hw_control { -/* XXX U-BOOT XXX */ -#if 0 - spinlock_t lock; + spinlock_t lock; + struct nand_chip *active; +#ifndef __UBOOT__ wait_queue_head_t wq; #endif - struct nand_chip *active; }; /** @@ -344,6 +424,7 @@ struct nand_hw_control { * any single ECC step, 0 if bitflips uncorrectable, -EIO hw error * @read_subpage: function to read parts of the page covered by ECC; * returns same as read_page() + * @write_subpage: function to write parts of the page covered by ECC. * @write_page: function to write a page according to the ECC generator * requirements. * @write_oob_raw: function to write chip OOB data without ECC @@ -375,6 +456,9 @@ struct nand_ecc_ctrl { uint8_t *buf, int oob_required, int page); int (*read_subpage)(struct mtd_info *mtd, struct nand_chip *chip, uint32_t offs, uint32_t len, uint8_t *buf); + int (*write_subpage)(struct mtd_info *mtd, struct nand_chip *chip, + uint32_t offset, uint32_t data_len, + const uint8_t *data_buf, int oob_required); int (*write_page)(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *buf, int oob_required); int (*write_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip, @@ -396,10 +480,16 @@ struct nand_ecc_ctrl { * consecutive order. */ struct nand_buffers { +#ifndef __UBOOT__ + uint8_t *ecccalc; + uint8_t *ecccode; + uint8_t *databuf; +#else uint8_t ecccalc[ALIGN(NAND_MAX_OOBSIZE, ARCH_DMA_MINALIGN)]; uint8_t ecccode[ALIGN(NAND_MAX_OOBSIZE, ARCH_DMA_MINALIGN)]; uint8_t databuf[ALIGN(NAND_MAX_PAGESIZE + NAND_MAX_OOBSIZE, ARCH_DMA_MINALIGN)]; +#endif }; /** @@ -410,13 +500,13 @@ struct nand_buffers { * flash device. * @read_byte: [REPLACEABLE] read one byte from the chip * @read_word: [REPLACEABLE] read one word from the chip + * @write_byte: [REPLACEABLE] write a single byte to the chip on the + * low 8 I/O lines * @write_buf: [REPLACEABLE] write data from the buffer to the chip * @read_buf: [REPLACEABLE] read data from the chip into the buffer - * @verify_buf: [REPLACEABLE] verify buffer contents against the chip - * data. * @select_chip: [REPLACEABLE] select chip nr - * @block_bad: [REPLACEABLE] check, if the block is bad - * @block_markbad: [REPLACEABLE] mark the block bad + * @block_bad: [REPLACEABLE] check if a block is bad, using OOB markers + * @block_markbad: [REPLACEABLE] mark a block bad * @cmd_ctrl: [BOARDSPECIFIC] hardwarespecific function for controlling * ALE/CLE/nCE. Also used to write command and address * @init_size: [BOARDSPECIFIC] hardwarespecific function for setting @@ -431,6 +521,8 @@ struct nand_buffers { * commands to the chip. * @waitfunc: [REPLACEABLE] hardwarespecific function for wait on * ready. + * @setup_read_retry: [FLASHSPECIFIC] flash (vendor) specific function for + * setting the read-retry mode. Mostly needed for MLC NAND. * @ecc: [BOARDSPECIFIC] ECC control structure * @buffers: buffer structure for read/write * @hwcontrol: platform-specific hardware control structure @@ -458,7 +550,13 @@ struct nand_buffers { * @badblockbits: [INTERN] minimum number of set bits in a good block's * bad block marker position; i.e., BBM == 11110111b is * not bad when badblockbits == 7 - * @cellinfo: [INTERN] MLC/multichip data from chip ident + * @bits_per_cell: [INTERN] number of bits per cell. i.e., 1 means SLC. + * @ecc_strength_ds: [INTERN] ECC correctability from the datasheet. + * Minimum amount of bit errors per @ecc_step_ds guaranteed + * to be correctable. If unknown, set to zero. + * @ecc_step_ds: [INTERN] ECC step required by the @ecc_strength_ds, + * also from the datasheet. It is the recommended ECC step + * size, if known; if unknown, set to zero. * @numchips: [INTERN] number of physical chips * @chipsize: [INTERN] the size of one chip for multichip arrays * @pagemask: [INTERN] page number mask = number of (pages / chip) - 1 @@ -471,9 +569,9 @@ struct nand_buffers { * non 0 if ONFI supported. * @onfi_params: [INTERN] holds the ONFI page parameter when ONFI is * supported, 0 otherwise. - * @onfi_set_features [REPLACEABLE] set the features for ONFI nand - * @onfi_get_features [REPLACEABLE] get the features for ONFI nand - * @ecclayout: [REPLACEABLE] the default ECC placement scheme + * @read_retries: [INTERN] the number of read retry modes supported + * @onfi_set_features: [REPLACEABLE] set the features for ONFI nand + * @onfi_get_features: [REPLACEABLE] get the features for ONFI nand * @bbt: [INTERN] bad block table pointer * @bbt_td: [REPLACEABLE] bad block table descriptor for flash * lookup. @@ -496,9 +594,14 @@ struct nand_chip { uint8_t (*read_byte)(struct mtd_info *mtd); u16 (*read_word)(struct mtd_info *mtd); + void (*write_byte)(struct mtd_info *mtd, uint8_t byte); void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len); void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len); - int (*verify_buf)(struct mtd_info *mtd, const uint8_t *buf, int len); +#ifdef __UBOOT__ +#if defined(CONFIG_MTD_NAND_VERIFY_WRITE) + int (*verify_buf)(struct mtd_info *mtd, const uint8_t *buf, int len); +#endif +#endif void (*select_chip)(struct mtd_info *mtd, int chip); int (*block_bad)(struct mtd_info *mtd, loff_t ofs, int getchip); int (*block_markbad)(struct mtd_info *mtd, loff_t ofs); @@ -514,12 +617,13 @@ struct nand_chip { int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state, int status, int page); int (*write_page)(struct mtd_info *mtd, struct nand_chip *chip, - const uint8_t *buf, int oob_required, int page, - int cached, int raw); + uint32_t offset, int data_len, const uint8_t *buf, + int oob_required, int page, int cached, int raw); int (*onfi_set_features)(struct mtd_info *mtd, struct nand_chip *chip, int feature_addr, uint8_t *subfeature_para); int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip, int feature_addr, uint8_t *subfeature_para); + int (*setup_read_retry)(struct mtd_info *mtd, int retry_mode); int chip_delay; unsigned int options; @@ -535,20 +639,26 @@ struct nand_chip { int pagebuf; unsigned int pagebuf_bitflips; int subpagesize; - uint8_t cellinfo; + uint8_t bits_per_cell; + uint16_t ecc_strength_ds; + uint16_t ecc_step_ds; int badblockpos; int badblockbits; int onfi_version; #ifdef CONFIG_SYS_NAND_ONFI_DETECTION - struct nand_onfi_params onfi_params; + struct nand_onfi_params onfi_params; #endif - int state; + int read_retries; + + flstate_t state; uint8_t *oob_poi; struct nand_hw_control *controller; +#ifdef __UBOOT__ struct nand_ecclayout *ecclayout; +#endif struct nand_ecc_ctrl ecc; struct nand_buffers *buffers; @@ -577,26 +687,83 @@ struct nand_chip { #define NAND_MFR_AMD 0x01 #define NAND_MFR_MACRONIX 0xc2 #define NAND_MFR_EON 0x92 +#define NAND_MFR_SANDISK 0x45 +#define NAND_MFR_INTEL 0x89 + +/* The maximum expected count of bytes in the NAND ID sequence */ +#define NAND_MAX_ID_LEN 8 + +/* + * A helper for defining older NAND chips where the second ID byte fully + * defined the chip, including the geometry (chip size, eraseblock size, page + * size). All these chips have 512 bytes NAND page size. + */ +#define LEGACY_ID_NAND(nm, devid, chipsz, erasesz, opts) \ + { .name = (nm), {{ .dev_id = (devid) }}, .pagesize = 512, \ + .chipsize = (chipsz), .erasesize = (erasesz), .options = (opts) } + +/* + * A helper for defining newer chips which report their page size and + * eraseblock size via the extended ID bytes. + * + * The real difference between LEGACY_ID_NAND and EXTENDED_ID_NAND is that with + * EXTENDED_ID_NAND, manufacturers overloaded the same device ID so that the + * device ID now only represented a particular total chip size (and voltage, + * buswidth), and the page size, eraseblock size, and OOB size could vary while + * using the same device ID. + */ +#define EXTENDED_ID_NAND(nm, devid, chipsz, opts) \ + { .name = (nm), {{ .dev_id = (devid) }}, .chipsize = (chipsz), \ + .options = (opts) } + +#define NAND_ECC_INFO(_strength, _step) \ + { .strength_ds = (_strength), .step_ds = (_step) } +#define NAND_ECC_STRENGTH(type) ((type)->ecc.strength_ds) +#define NAND_ECC_STEP(type) ((type)->ecc.step_ds) /** * struct nand_flash_dev - NAND Flash Device ID Structure - * @name: Identify the device type - * @id: device ID code - * @pagesize: Pagesize in bytes. Either 256 or 512 or 0 - * If the pagesize is 0, then the real pagesize - * and the eraseize are determined from the - * extended id bytes in the chip - * @erasesize: Size of an erase block in the flash device. - * @chipsize: Total chipsize in Mega Bytes - * @options: Bitfield to store chip relevant options + * @name: a human-readable name of the NAND chip + * @dev_id: the device ID (the second byte of the full chip ID array) + * @mfr_id: manufecturer ID part of the full chip ID array (refers the same + * memory address as @id[0]) + * @dev_id: device ID part of the full chip ID array (refers the same memory + * address as @id[1]) + * @id: full device ID array + * @pagesize: size of the NAND page in bytes; if 0, then the real page size (as + * well as the eraseblock size) is determined from the extended NAND + * chip ID array) + * @chipsize: total chip size in MiB + * @erasesize: eraseblock size in bytes (determined from the extended ID if 0) + * @options: stores various chip bit options + * @id_len: The valid length of the @id. + * @oobsize: OOB size + * @ecc.strength_ds: The ECC correctability from the datasheet, same as the + * @ecc_strength_ds in nand_chip{}. + * @ecc.step_ds: The ECC step required by the @ecc.strength_ds, same as the + * @ecc_step_ds in nand_chip{}, also from the datasheet. + * For example, the "4bit ECC for each 512Byte" can be set with + * NAND_ECC_INFO(4, 512). */ struct nand_flash_dev { char *name; - int id; - unsigned long pagesize; - unsigned long chipsize; - unsigned long erasesize; - unsigned long options; + union { + struct { + uint8_t mfr_id; + uint8_t dev_id; + }; + uint8_t id[NAND_MAX_ID_LEN]; + }; + unsigned int pagesize; + unsigned int chipsize; + unsigned int erasesize; + unsigned int options; + uint16_t id_len; + uint16_t oobsize; + struct { + uint16_t strength_ds; + uint16_t step_ds; + } ecc; }; /** @@ -609,23 +776,25 @@ struct nand_manufacturers { char *name; }; -extern const struct nand_flash_dev nand_flash_ids[]; -extern const struct nand_manufacturers nand_manuf_ids[]; +extern struct nand_flash_dev nand_flash_ids[]; +extern struct nand_manufacturers nand_manuf_ids[]; extern int nand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd); -extern int nand_update_bbt(struct mtd_info *mtd, loff_t offs); extern int nand_default_bbt(struct mtd_info *mtd); +extern int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs); extern int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt); extern int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr, int allowbbt); extern int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, uint8_t *buf); +#ifdef __UBOOT__ /* * Constants for oob configuration */ #define NAND_SMALL_BADBLOCK_POS 5 #define NAND_LARGE_BADBLOCK_POS 0 +#endif /** * struct platform_nand_chip - chip level device structure @@ -656,20 +825,29 @@ struct platform_device; /** * struct platform_nand_ctrl - controller level device structure + * @probe: platform specific function to probe/setup hardware + * @remove: platform specific function to remove/teardown hardware * @hwcontrol: platform specific hardware control structure * @dev_ready: platform specific function to read ready/busy pin * @select_chip: platform specific chip select function * @cmd_ctrl: platform specific function for controlling * ALE/CLE/nCE. Also used to write command and address + * @write_buf: platform specific function for write buffer + * @read_buf: platform specific function for read buffer + * @read_byte: platform specific function to read one byte from chip * @priv: private data to transport driver specific settings * * All fields are optional and depend on the hardware driver requirements */ struct platform_nand_ctrl { + int (*probe)(struct platform_device *pdev); + void (*remove)(struct platform_device *pdev); void (*hwcontrol)(struct mtd_info *mtd, int cmd); int (*dev_ready)(struct mtd_info *mtd); void (*select_chip)(struct mtd_info *mtd, int chip); void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl); + void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len); + void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len); unsigned char (*read_byte)(struct mtd_info *mtd); void *priv; }; @@ -693,16 +871,14 @@ struct platform_nand_chip *get_platform_nandchip(struct mtd_info *mtd) return chip->priv; } -/* Standard NAND functions from nand_base.c */ -void nand_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len); -void nand_write_buf16(struct mtd_info *mtd, const uint8_t *buf, int len); -void nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len); -void nand_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len); -uint8_t nand_read_byte(struct mtd_info *mtd); +#ifdef CONFIG_SYS_NAND_ONFI_DETECTION +/* return the supported features. */ +static inline int onfi_feature(struct nand_chip *chip) +{ + return chip->onfi_version ? le16_to_cpu(chip->onfi_params.features) : 0; +} /* return the supported asynchronous timing mode. */ - -#ifdef CONFIG_SYS_NAND_ONFI_DETECTION static inline int onfi_get_async_timing_mode(struct nand_chip *chip) { if (!chip->onfi_version) @@ -719,6 +895,16 @@ static inline int onfi_get_sync_timing_mode(struct nand_chip *chip) } #endif +/* + * Check if it is a SLC nand. + * The !nand_is_slc() can be used to check the MLC/TLC nand chips. + * We do not distinguish the MLC and TLC now. + */ +static inline bool nand_is_slc(struct nand_chip *chip) +{ + return chip->bits_per_cell == 1; +} + /** * Check if the opcode's address should be sent only on the lower 8 bits * @command: opcode to check @@ -737,5 +923,12 @@ static inline int nand_opcode_8bits(unsigned int command) return 0; } - +#ifdef __UBOOT__ +/* Standard NAND functions from nand_base.c */ +void nand_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len); +void nand_write_buf16(struct mtd_info *mtd, const uint8_t *buf, int len); +void nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len); +void nand_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len); +uint8_t nand_read_byte(struct mtd_info *mtd); +#endif #endif /* __LINUX_MTD_NAND_H */ diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index d1d9a96..ce0e8db 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -1,11 +1,9 @@ /* * MTD partitioning layer definitions * - * (C) 2000 Nicolas Pitre <nico@cam.org> + * (C) 2000 Nicolas Pitre <nico@fluxnic.net> * * This code is GPL - * - * $Id: partitions.h,v 1.17 2005/11/07 11:14:55 gleixner Exp $ */ #ifndef MTD_PARTITIONS_H @@ -18,7 +16,7 @@ * Partition definition structure: * * An array of struct partition is passed along with a MTD object to - * add_mtd_partitions() to create them. + * mtd_device_register() to create them. * * For each partition, these fields are available: * name: string that will be used to label the partition's MTD device. @@ -26,7 +24,9 @@ * will extend to the end of the master MTD device. * offset: absolute starting position within the master MTD device; if * defined as MTDPART_OFS_APPEND, the partition will start where the - * previous one ended; if MTDPART_OFS_NXTBLK, at the next erase block. + * previous one ended; if MTDPART_OFS_NXTBLK, at the next erase block; + * if MTDPART_OFS_RETAIN, consume as much as possible, leaving size + * after the end of partition. * mask_flags: contains flags that have to be masked (removed) from the * master MTD flag set for the corresponding MTD partition. * For example, to force a read-only partition, simply adding @@ -37,23 +37,34 @@ */ struct mtd_partition { - char *name; /* identifier string */ + const char *name; /* identifier string */ uint64_t size; /* partition size */ uint64_t offset; /* offset within the master MTD space */ - u_int32_t mask_flags; /* master MTD flags to mask out for this partition */ - struct nand_ecclayout *ecclayout; /* out of band layout for this partition (NAND only)*/ - struct mtd_info **mtdp; /* pointer to store the MTD object */ + uint32_t mask_flags; /* master MTD flags to mask out for this partition */ + struct nand_ecclayout *ecclayout; /* out of band layout for this partition (NAND only) */ }; +#define MTDPART_OFS_RETAIN (-3) #define MTDPART_OFS_NXTBLK (-2) #define MTDPART_OFS_APPEND (-1) #define MTDPART_SIZ_FULL (0) -int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *, int); -int del_mtd_partitions(struct mtd_info *); +struct mtd_info; +struct device_node; + +#ifndef __UBOOT__ +/** + * struct mtd_part_parser_data - used to pass data to MTD partition parsers. + * @origin: for RedBoot, start address of MTD device + * @of_node: for OF parsers, device node containing partitioning information + */ +struct mtd_part_parser_data { + unsigned long origin; + struct device_node *of_node; +}; + -#if 0 /* * Functions dealing with the various ways of partitioning the space */ @@ -62,23 +73,18 @@ struct mtd_part_parser { struct list_head list; struct module *owner; const char *name; - int (*parse_fn)(struct mtd_info *, struct mtd_partition **, unsigned long); + int (*parse_fn)(struct mtd_info *, struct mtd_partition **, + struct mtd_part_parser_data *); }; -extern int register_mtd_parser(struct mtd_part_parser *parser); -extern int deregister_mtd_parser(struct mtd_part_parser *parser); -extern int parse_mtd_partitions(struct mtd_info *master, const char **types, - struct mtd_partition **pparts, unsigned long origin); - -#define put_partition_parser(p) do { module_put((p)->owner); } while(0) - -struct device; -struct device_node; - -int __devinit of_mtd_parse_partitions(struct device *dev, - struct mtd_info *mtd, - struct device_node *node, - struct mtd_partition **pparts); +extern void register_mtd_parser(struct mtd_part_parser *parser); +extern void deregister_mtd_parser(struct mtd_part_parser *parser); #endif +int mtd_is_partition(const struct mtd_info *mtd); +int mtd_add_partition(struct mtd_info *master, const char *name, + long long offset, long long length); +int mtd_del_partition(struct mtd_info *master, int partno); +uint64_t mtd_get_device_size(const struct mtd_info *mtd); + #endif diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h index 4755770..d9e58ae 100644 --- a/include/linux/mtd/ubi.h +++ b/include/linux/mtd/ubi.h @@ -9,9 +9,15 @@ #ifndef __LINUX_UBI_H__ #define __LINUX_UBI_H__ -/* #include <asm/ioctl.h> */ #include <linux/types.h> +#define __UBOOT__ +#ifndef __UBOOT__ +#include <linux/ioctl.h> #include <mtd/ubi-user.h> +#endif + +/* All voumes/LEBs */ +#define UBI_ALL -1 /* * enum ubi_open_mode - UBI volume open mode constants. @@ -33,13 +39,13 @@ enum { * @size: how many physical eraseblocks are reserved for this volume * @used_bytes: how many bytes of data this volume contains * @used_ebs: how many physical eraseblocks of this volume actually contain any - * data + * data * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME) * @corrupted: non-zero if the volume is corrupted (static volumes only) * @upd_marker: non-zero if the volume has update marker set * @alignment: volume alignment * @usable_leb_size: how many bytes are available in logical eraseblocks of - * this volume + * this volume * @name_len: volume name length * @name: volume name * @cdev: UBI volume character device major and minor numbers @@ -75,7 +81,7 @@ enum { * physical eraseblock size and on how much bytes UBI headers consume. But * because of the volume alignment (@alignment), the usable size of logical * eraseblocks if a volume may be less. The following equation is true: - * @usable_leb_size = LEB size - (LEB size mod @alignment), + * @usable_leb_size = LEB size - (LEB size mod @alignment), * where LEB size is the logical eraseblock size defined by the UBI device. * * The alignment is multiple to the minimal flash input/output unit size or %1 @@ -104,20 +110,79 @@ struct ubi_volume_info { * struct ubi_device_info - UBI device description data structure. * @ubi_num: ubi device number * @leb_size: logical eraseblock size on this UBI device + * @leb_start: starting offset of logical eraseblocks within physical + * eraseblocks * @min_io_size: minimal I/O unit size + * @max_write_size: maximum amount of bytes the underlying flash can write at a + * time (MTD write buffer size) * @ro_mode: if this device is in read-only mode * @cdev: UBI character device major and minor numbers * * Note, @leb_size is the logical eraseblock size offered by the UBI device. * Volumes of this UBI device may have smaller logical eraseblock size if their * alignment is not equivalent to %1. + * + * The @max_write_size field describes flash write maximum write unit. For + * example, NOR flash allows for changing individual bytes, so @min_io_size is + * %1. However, it does not mean than NOR flash has to write data byte-by-byte. + * Instead, CFI NOR flashes have a write-buffer of, e.g., 64 bytes, and when + * writing large chunks of data, they write 64-bytes at a time. Obviously, this + * improves write throughput. + * + * Also, the MTD device may have N interleaved (striped) flash chips + * underneath, in which case @min_io_size can be physical min. I/O size of + * single flash chip, while @max_write_size can be N * @min_io_size. + * + * The @max_write_size field is always greater or equivalent to @min_io_size. + * E.g., some NOR flashes may have (@min_io_size = 1, @max_write_size = 64). In + * contrast, NAND flashes usually have @min_io_size = @max_write_size = NAND + * page size. */ struct ubi_device_info { int ubi_num; int leb_size; + int leb_start; int min_io_size; + int max_write_size; int ro_mode; +#ifndef __UBOOT__ dev_t cdev; +#endif +}; + +/* + * Volume notification types. + * @UBI_VOLUME_ADDED: a volume has been added (an UBI device was attached or a + * volume was created) + * @UBI_VOLUME_REMOVED: a volume has been removed (an UBI device was detached + * or a volume was removed) + * @UBI_VOLUME_RESIZED: a volume has been re-sized + * @UBI_VOLUME_RENAMED: a volume has been re-named + * @UBI_VOLUME_UPDATED: data has been written to a volume + * + * These constants define which type of event has happened when a volume + * notification function is invoked. + */ +enum { + UBI_VOLUME_ADDED, + UBI_VOLUME_REMOVED, + UBI_VOLUME_RESIZED, + UBI_VOLUME_RENAMED, + UBI_VOLUME_UPDATED, +}; + +/* + * struct ubi_notification - UBI notification description structure. + * @di: UBI device description object + * @vi: UBI volume description object + * + * UBI notifiers are called with a pointer to an object of this type. The + * object describes the notification. Namely, it provides a description of the + * UBI device and UBI volume the notification informs about. + */ +struct ubi_notification { + struct ubi_device_info di; + struct ubi_volume_info vi; }; /* UBI descriptor given to users when they open UBI volumes */ @@ -129,17 +194,37 @@ void ubi_get_volume_info(struct ubi_volume_desc *desc, struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode); struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name, int mode); +struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode); + +#ifndef __UBOOT__ +typedef int (*notifier_fn_t)(void *nb, + unsigned long action, void *data); + +struct notifier_block { + notifier_fn_t notifier_call; + struct notifier_block *next; + void *next; + int priority; +}; + +int ubi_register_volume_notifier(struct notifier_block *nb, + int ignore_existing); +int ubi_unregister_volume_notifier(struct notifier_block *nb); +#endif + void ubi_close_volume(struct ubi_volume_desc *desc); int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, int len, int check); int ubi_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, - int offset, int len, int dtype); + int offset, int len); int ubi_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, - int len, int dtype); + int len); int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum); int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum); -int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype); +int ubi_leb_map(struct ubi_volume_desc *desc, int lnum); int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum); +int ubi_sync(int ubi_num); +int ubi_flush(int ubi_num, int vol_id, int lnum); /* * This function is the same as the 'ubi_leb_read()' function, but it does not @@ -150,25 +235,4 @@ static inline int ubi_read(struct ubi_volume_desc *desc, int lnum, char *buf, { return ubi_leb_read(desc, lnum, buf, offset, len, 0); } - -/* - * This function is the same as the 'ubi_leb_write()' functions, but it does - * not have the data type argument. - */ -static inline int ubi_write(struct ubi_volume_desc *desc, int lnum, - const void *buf, int offset, int len) -{ - return ubi_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN); -} - -/* - * This function is the same as the 'ubi_leb_change()' functions, but it does - * not have the data type argument. - */ -static inline int ubi_change(struct ubi_volume_desc *desc, int lnum, - const void *buf, int len) -{ - return ubi_leb_change(desc, lnum, buf, len, UBI_UNKNOWN); -} - #endif /* !__LINUX_UBI_H__ */ diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h index ac3c298..b9f4bcb 100644 --- a/include/mtd/mtd-abi.h +++ b/include/mtd/mtd-abi.h @@ -1,30 +1,44 @@ /* - * $Id: mtd-abi.h,v 1.13 2005/11/07 11:14:56 gleixner Exp $ + * Copyright © 1999-2010 David Woodhouse <dwmw2@infradead.org> et al. + * + * SPDX-License-Identifier: GPL-2.0+ * - * Portions of MTD ABI definition which are shared by kernel and user space */ #ifndef __MTD_ABI_H__ #define __MTD_ABI_H__ -#if 1 +#define __UBOOT__ +#ifdef __UBOOT__ #include <linux/compat.h> #endif #include <linux/compiler.h> struct erase_info_user { - uint32_t start; - uint32_t length; + __u32 start; + __u32 length; +}; + +struct erase_info_user64 { + __u64 start; + __u64 length; }; struct mtd_oob_buf { - uint32_t start; - uint32_t length; + __u32 start; + __u32 length; unsigned char __user *ptr; }; -/* +struct mtd_oob_buf64 { + __u64 start; + __u32 pad; + __u32 length; + __u64 usr_ptr; +}; + +/** * MTD operation modes * * @MTD_OPS_PLACE_OOB: OOB data are placed at the given offset (default) @@ -43,18 +57,45 @@ enum { MTD_OPS_RAW = 2, }; +/** + * struct mtd_write_req - data structure for requesting a write operation + * + * @start: start address + * @len: length of data buffer + * @ooblen: length of OOB buffer + * @usr_data: user-provided data buffer + * @usr_oob: user-provided OOB buffer + * @mode: MTD mode (see "MTD operation modes") + * @padding: reserved, must be set to 0 + * + * This structure supports ioctl(MEMWRITE) operations, allowing data and/or OOB + * writes in various modes. To write to OOB-only, set @usr_data == NULL, and to + * write data-only, set @usr_oob == NULL. However, setting both @usr_data and + * @usr_oob to NULL is not allowed. + */ +struct mtd_write_req { + __u64 start; + __u64 len; + __u64 ooblen; + __u64 usr_data; + __u64 usr_oob; + __u8 mode; + __u8 padding[7]; +}; + #define MTD_ABSENT 0 #define MTD_RAM 1 #define MTD_ROM 2 #define MTD_NORFLASH 3 -#define MTD_NANDFLASH 4 +#define MTD_NANDFLASH 4 /* SLC NAND */ #define MTD_DATAFLASH 6 #define MTD_UBIVOLUME 7 +#define MTD_MLCNANDFLASH 8 /* MLC NAND (including TLC) */ #define MTD_WRITEABLE 0x400 /* Device is writeable */ #define MTD_BIT_WRITEABLE 0x800 /* Single bits can be flipped */ #define MTD_NO_ERASE 0x1000 /* No erase necessary */ -#define MTD_STUPID_LOCK 0x2000 /* Always locked after reset */ +#define MTD_POWERUP_LOCK 0x2000 /* Always locked after reset */ /* Some common devices / combinations of capabilities */ #define MTD_CAP_ROM 0 @@ -62,12 +103,12 @@ enum { #define MTD_CAP_NORFLASH (MTD_WRITEABLE | MTD_BIT_WRITEABLE) #define MTD_CAP_NANDFLASH (MTD_WRITEABLE) -/* ECC byte placement */ -#define MTD_NANDECC_OFF 0 /* Switch off ECC (Not recommended) */ -#define MTD_NANDECC_PLACE 1 /* Use the given placement in the structure (YAFFS1 legacy mode) */ -#define MTD_NANDECC_AUTOPLACE 2 /* Use the default placement scheme */ -#define MTD_NANDECC_PLACEONLY 3 /* Use the given placement in the structure (Do not store ecc result on read) */ -#define MTD_NANDECC_AUTOPL_USR 4 /* Use the given autoplacement scheme rather than using the default */ +/* Obsolete ECC byte placement modes (used with obsolete MEMGETOOBSEL) */ +#define MTD_NANDECC_OFF 0 // Switch off ECC (Not recommended) +#define MTD_NANDECC_PLACE 1 // Use the given placement in the structure (YAFFS1 legacy mode) +#define MTD_NANDECC_AUTOPLACE 2 // Use the default placement scheme +#define MTD_NANDECC_PLACEONLY 3 // Use the given placement in the structure (Do not store ecc result on read) +#define MTD_NANDECC_AUTOPL_USR 4 // Use the given autoplacement scheme rather than using the default /* OTP mode selection */ #define MTD_OTP_OFF 0 @@ -75,32 +116,35 @@ enum { #define MTD_OTP_USER 2 struct mtd_info_user { - uint8_t type; - uint32_t flags; - uint32_t size; /* Total size of the MTD */ - uint32_t erasesize; - uint32_t writesize; - uint32_t oobsize; /* Amount of OOB data per block (e.g. 16) */ - /* The below two fields are obsolete and broken, do not use them - * (TODO: remove at some point) */ - uint32_t ecctype; - uint32_t eccsize; + __u8 type; + __u32 flags; + __u32 size; /* Total size of the MTD */ + __u32 erasesize; + __u32 writesize; + __u32 oobsize; /* Amount of OOB data per block (e.g. 16) */ + __u64 padding; /* Old obsolete field; do not use */ }; struct region_info_user { - uint32_t offset; /* At which this region starts, - * from the beginning of the MTD */ - uint32_t erasesize; /* For this region */ - uint32_t numblocks; /* Number of blocks in this region */ - uint32_t regionindex; + __u32 offset; /* At which this region starts, + * from the beginning of the MTD */ + __u32 erasesize; /* For this region */ + __u32 numblocks; /* Number of blocks in this region */ + __u32 regionindex; }; struct otp_info { - uint32_t start; - uint32_t length; - uint32_t locked; + __u32 start; + __u32 length; + __u32 locked; }; +/* + * Note, the following ioctl existed in the past and was removed: + * #define MEMSETOOBSEL _IOW('M', 9, struct nand_oobinfo) + * Try to avoid adding a new ioctl with the same ioctl number. + */ + /* Get basic MTD characteristics info (better to use sysfs) */ #define MEMGETINFO _IOR('M', 1, struct mtd_info_user) /* Erase segment of MTD */ @@ -118,12 +162,11 @@ struct otp_info { /* Get information about the erase region for a specific index */ #define MEMGETREGIONINFO _IOWR('M', 8, struct region_info_user) /* Get info about OOB modes (e.g., RAW, PLACE, AUTO) - legacy interface */ -#define MEMSETOOBSEL _IOW('M', 9, struct nand_oobinfo) #define MEMGETOOBSEL _IOR('M', 10, struct nand_oobinfo) /* Check if an eraseblock is bad */ -#define MEMGETBADBLOCK _IOW('M', 11, loff_t) +#define MEMGETBADBLOCK _IOW('M', 11, __kernel_loff_t) /* Mark an eraseblock as bad */ -#define MEMSETBADBLOCK _IOW('M', 12, loff_t) +#define MEMSETBADBLOCK _IOW('M', 12, __kernel_loff_t) /* Set OTP (One-Time Programmable) mode (factory vs. user) */ #define OTPSELECT _IOR('M', 13, int) /* Get number of OTP (One-Time Programmable) regions */ @@ -133,26 +176,57 @@ struct otp_info { /* Lock a given range of user data (must be in mode %MTD_FILE_MODE_OTP_USER) */ #define OTPLOCK _IOR('M', 16, struct otp_info) /* Get ECC layout (deprecated) */ -#define ECCGETLAYOUT _IOR('M', 17, struct nand_ecclayout) +#define ECCGETLAYOUT _IOR('M', 17, struct nand_ecclayout_user) /* Get statistics about corrected/uncorrected errors */ #define ECCGETSTATS _IOR('M', 18, struct mtd_ecc_stats) /* Set MTD mode on a per-file-descriptor basis (see "MTD file modes") */ #define MTDFILEMODE _IO('M', 19) +/* Erase segment of MTD (supports 64-bit address) */ +#define MEMERASE64 _IOW('M', 20, struct erase_info_user64) +/* Write data to OOB (64-bit version) */ +#define MEMWRITEOOB64 _IOWR('M', 21, struct mtd_oob_buf64) +/* Read data from OOB (64-bit version) */ +#define MEMREADOOB64 _IOWR('M', 22, struct mtd_oob_buf64) +/* Check if chip is locked (for MTD that supports it) */ +#define MEMISLOCKED _IOR('M', 23, struct erase_info_user) +/* + * Most generic write interface; can write in-band and/or out-of-band in various + * modes (see "struct mtd_write_req"). This ioctl is not supported for flashes + * without OOB, e.g., NOR flash. + */ +#define MEMWRITE _IOWR('M', 24, struct mtd_write_req) /* * Obsolete legacy interface. Keep it in order not to break userspace * interfaces */ struct nand_oobinfo { - uint32_t useecc; - uint32_t eccbytes; - uint32_t oobfree[8][2]; - uint32_t eccpos[48]; + __u32 useecc; + __u32 eccbytes; + __u32 oobfree[8][2]; + __u32 eccpos[32]; }; struct nand_oobfree { - uint32_t offset; - uint32_t length; + __u32 offset; + __u32 length; +}; + +#define MTD_MAX_OOBFREE_ENTRIES 8 +#define MTD_MAX_ECCPOS_ENTRIES 64 +/* + * OBSOLETE: ECC layout control structure. Exported to user-space via ioctl + * ECCGETLAYOUT for backwards compatbility and should not be mistaken as a + * complete set of ECC information. The ioctl truncates the larger internal + * structure to retain binary compatibility with the static declaration of the + * ioctl. Note that the "MTD_MAX_..._ENTRIES" macros represent the max size of + * the user struct, not the MAX size of the internal struct nand_ecclayout. + */ +struct nand_ecclayout_user { + __u32 eccbytes; + __u32 eccpos[MTD_MAX_ECCPOS_ENTRIES]; + __u32 oobavail; + struct nand_oobfree oobfree[MTD_MAX_OOBFREE_ENTRIES]; }; /** @@ -164,10 +238,10 @@ struct nand_oobfree { * @bbtblocks: number of blocks reserved for bad block tables */ struct mtd_ecc_stats { - uint32_t corrected; - uint32_t failed; - uint32_t badblocks; - uint32_t bbtblocks; + __u32 corrected; + __u32 failed; + __u32 badblocks; + __u32 bbtblocks; }; /* @@ -188,10 +262,15 @@ struct mtd_ecc_stats { * used out of necessity (e.g., `write()', ioctl(MEMWRITEOOB64)). */ enum mtd_file_modes { - MTD_MODE_NORMAL = MTD_OTP_OFF, - MTD_MODE_OTP_FACTORY = MTD_OTP_FACTORY, - MTD_MODE_OTP_USER = MTD_OTP_USER, - MTD_MODE_RAW, + MTD_FILE_MODE_NORMAL = MTD_OTP_OFF, + MTD_FILE_MODE_OTP_FACTORY = MTD_OTP_FACTORY, + MTD_FILE_MODE_OTP_USER = MTD_OTP_USER, + MTD_FILE_MODE_RAW, }; +static inline int mtd_type_is_nand_user(const struct mtd_info_user *mtd) +{ + return mtd->type == MTD_NANDFLASH || mtd->type == MTD_MLCNANDFLASH; +} + #endif /* __MTD_ABI_H__ */ diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h index 1ccc06e..c93914a 100644 --- a/include/mtd/ubi-user.h +++ b/include/mtd/ubi-user.h @@ -1,7 +1,7 @@ /* - * Copyright (c) International Business Machines Corp., 2006 + * Copyright © International Business Machines Corp., 2006 * - * SPDX-License-Identifier: GPL-2.0+ + * SPDX-License-Identifier: GPL-2.0+ * * Author: Artem Bityutskiy (Битюцкий Артём) */ @@ -9,6 +9,8 @@ #ifndef __UBI_USER_H__ #define __UBI_USER_H__ +#include <linux/types.h> + /* * UBI device creation (the same as MTD device attachment) * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -28,30 +30,37 @@ * UBI volume creation * ~~~~~~~~~~~~~~~~~~~ * - * UBI volumes are created via the %UBI_IOCMKVOL IOCTL command of UBI character + * UBI volumes are created via the %UBI_IOCMKVOL ioctl command of UBI character * device. A &struct ubi_mkvol_req object has to be properly filled and a - * pointer to it has to be passed to the IOCTL. + * pointer to it has to be passed to the ioctl. * * UBI volume deletion * ~~~~~~~~~~~~~~~~~~~ * - * To delete a volume, the %UBI_IOCRMVOL IOCTL command of the UBI character + * To delete a volume, the %UBI_IOCRMVOL ioctl command of the UBI character * device should be used. A pointer to the 32-bit volume ID hast to be passed - * to the IOCTL. + * to the ioctl. * * UBI volume re-size * ~~~~~~~~~~~~~~~~~~ * - * To re-size a volume, the %UBI_IOCRSVOL IOCTL command of the UBI character + * To re-size a volume, the %UBI_IOCRSVOL ioctl command of the UBI character * device should be used. A &struct ubi_rsvol_req object has to be properly - * filled and a pointer to it has to be passed to the IOCTL. + * filled and a pointer to it has to be passed to the ioctl. + * + * UBI volumes re-name + * ~~~~~~~~~~~~~~~~~~~ + * + * To re-name several volumes atomically at one go, the %UBI_IOCRNVOL command + * of the UBI character device should be used. A &struct ubi_rnvol_req object + * has to be properly filled and a pointer to it has to be passed to the ioctl. * * UBI volume update * ~~~~~~~~~~~~~~~~~ * - * Volume update should be done via the %UBI_IOCVOLUP IOCTL command of the + * Volume update should be done via the %UBI_IOCVOLUP ioctl command of the * corresponding UBI volume character device. A pointer to a 64-bit update - * size should be passed to the IOCTL. After this, UBI expects user to write + * size should be passed to the ioctl. After this, UBI expects user to write * this number of bytes to the volume character device. The update is finished * when the claimed number of bytes is passed. So, the volume update sequence * is something like: @@ -61,14 +70,58 @@ * write(fd, buf, image_size); * close(fd); * - * Atomic eraseblock change + * Logical eraseblock erase + * ~~~~~~~~~~~~~~~~~~~~~~~~ + * + * To erase a logical eraseblock, the %UBI_IOCEBER ioctl command of the + * corresponding UBI volume character device should be used. This command + * unmaps the requested logical eraseblock, makes sure the corresponding + * physical eraseblock is successfully erased, and returns. + * + * Atomic logical eraseblock change + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * Atomic logical eraseblock change operation is called using the %UBI_IOCEBCH + * ioctl command of the corresponding UBI volume character device. A pointer to + * a &struct ubi_leb_change_req object has to be passed to the ioctl. Then the + * user is expected to write the requested amount of bytes (similarly to what + * should be done in case of the "volume update" ioctl). + * + * Logical eraseblock map + * ~~~~~~~~~~~~~~~~~~~~~ + * + * To map a logical eraseblock to a physical eraseblock, the %UBI_IOCEBMAP + * ioctl command should be used. A pointer to a &struct ubi_map_req object is + * expected to be passed. The ioctl maps the requested logical eraseblock to + * a physical eraseblock and returns. Only non-mapped logical eraseblocks can + * be mapped. If the logical eraseblock specified in the request is already + * mapped to a physical eraseblock, the ioctl fails and returns error. + * + * Logical eraseblock unmap * ~~~~~~~~~~~~~~~~~~~~~~~~ * - * Atomic eraseblock change operation is done via the %UBI_IOCEBCH IOCTL - * command of the corresponding UBI volume character device. A pointer to - * &struct ubi_leb_change_req has to be passed to the IOCTL. Then the user is - * expected to write the requested amount of bytes. This is similar to the - * "volume update" IOCTL. + * To unmap a logical eraseblock to a physical eraseblock, the %UBI_IOCEBUNMAP + * ioctl command should be used. The ioctl unmaps the logical eraseblocks, + * schedules corresponding physical eraseblock for erasure, and returns. Unlike + * the "LEB erase" command, it does not wait for the physical eraseblock being + * erased. Note, the side effect of this is that if an unclean reboot happens + * after the unmap ioctl returns, you may find the LEB mapped again to the same + * physical eraseblock after the UBI is run again. + * + * Check if logical eraseblock is mapped + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * To check if a logical eraseblock is mapped to a physical eraseblock, the + * %UBI_IOCEBISMAP ioctl command should be used. It returns %0 if the LEB is + * not mapped, and %1 if it is mapped. + * + * Set an UBI volume property + * ~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * To set an UBI volume property the %UBI_IOCSETPROP ioctl command should be + * used. A pointer to a &struct ubi_set_vol_prop_req object is expected to be + * passed. The object describes which property should be set, and to which value + * it should be set. */ /* @@ -82,56 +135,56 @@ /* Maximum volume name length */ #define UBI_MAX_VOLUME_NAME 127 -/* IOCTL commands of UBI character devices */ +/* ioctl commands of UBI character devices */ #define UBI_IOC_MAGIC 'o' /* Create an UBI volume */ #define UBI_IOCMKVOL _IOW(UBI_IOC_MAGIC, 0, struct ubi_mkvol_req) /* Remove an UBI volume */ -#define UBI_IOCRMVOL _IOW(UBI_IOC_MAGIC, 1, int32_t) +#define UBI_IOCRMVOL _IOW(UBI_IOC_MAGIC, 1, __s32) /* Re-size an UBI volume */ #define UBI_IOCRSVOL _IOW(UBI_IOC_MAGIC, 2, struct ubi_rsvol_req) +/* Re-name volumes */ +#define UBI_IOCRNVOL _IOW(UBI_IOC_MAGIC, 3, struct ubi_rnvol_req) -/* IOCTL commands of the UBI control character device */ +/* ioctl commands of the UBI control character device */ #define UBI_CTRL_IOC_MAGIC 'o' /* Attach an MTD device */ #define UBI_IOCATT _IOW(UBI_CTRL_IOC_MAGIC, 64, struct ubi_attach_req) /* Detach an MTD device */ -#define UBI_IOCDET _IOW(UBI_CTRL_IOC_MAGIC, 65, int32_t) +#define UBI_IOCDET _IOW(UBI_CTRL_IOC_MAGIC, 65, __s32) -/* IOCTL commands of UBI volume character devices */ +/* ioctl commands of UBI volume character devices */ #define UBI_VOL_IOC_MAGIC 'O' -/* Start UBI volume update */ -#define UBI_IOCVOLUP _IOW(UBI_VOL_IOC_MAGIC, 0, int64_t) -/* An eraseblock erasure command, used for debugging, disabled by default */ -#define UBI_IOCEBER _IOW(UBI_VOL_IOC_MAGIC, 1, int32_t) -/* An atomic eraseblock change command */ -#define UBI_IOCEBCH _IOW(UBI_VOL_IOC_MAGIC, 2, int32_t) +/* Start UBI volume update + * Note: This actually takes a pointer (__s64*), but we can't change + * that without breaking the ABI on 32bit systems + */ +#define UBI_IOCVOLUP _IOW(UBI_VOL_IOC_MAGIC, 0, __s64) +/* LEB erasure command, used for debugging, disabled by default */ +#define UBI_IOCEBER _IOW(UBI_VOL_IOC_MAGIC, 1, __s32) +/* Atomic LEB change command */ +#define UBI_IOCEBCH _IOW(UBI_VOL_IOC_MAGIC, 2, __s32) +/* Map LEB command */ +#define UBI_IOCEBMAP _IOW(UBI_VOL_IOC_MAGIC, 3, struct ubi_map_req) +/* Unmap LEB command */ +#define UBI_IOCEBUNMAP _IOW(UBI_VOL_IOC_MAGIC, 4, __s32) +/* Check if LEB is mapped command */ +#define UBI_IOCEBISMAP _IOR(UBI_VOL_IOC_MAGIC, 5, __s32) +/* Set an UBI volume property */ +#define UBI_IOCSETVOLPROP _IOW(UBI_VOL_IOC_MAGIC, 6, \ + struct ubi_set_vol_prop_req) /* Maximum MTD device name length supported by UBI */ #define MAX_UBI_MTD_NAME_LEN 127 -/* - * UBI data type hint constants. - * - * UBI_LONGTERM: long-term data - * UBI_SHORTTERM: short-term data - * UBI_UNKNOWN: data persistence is unknown - * - * These constants are used when data is written to UBI volumes in order to - * help the UBI wear-leveling unit to find more appropriate physical - * eraseblocks. - */ -enum { - UBI_LONGTERM = 1, - UBI_SHORTTERM = 2, - UBI_UNKNOWN = 3, -}; +/* Maximum amount of UBI volumes that can be re-named at one go */ +#define UBI_MAX_RNVOL 32 /* * UBI volume type constants. @@ -144,11 +197,23 @@ enum { UBI_STATIC_VOLUME = 4, }; +/* + * UBI set volume property ioctl constants. + * + * @UBI_VOL_PROP_DIRECT_WRITE: allow (any non-zero value) or disallow (value 0) + * user to directly write and erase individual + * eraseblocks on dynamic volumes + */ +enum { + UBI_VOL_PROP_DIRECT_WRITE = 1, +}; + /** * struct ubi_attach_req - attach MTD device request. * @ubi_num: UBI device number to create * @mtd_num: MTD device number to attach * @vid_hdr_offset: VID header offset (use defaults if %0) + * @max_beb_per1024: maximum expected number of bad PEB per 1024 PEBs * @padding: reserved for future, not used, has to be zeroed * * This data structure is used to specify MTD device UBI has to attach and the @@ -164,20 +229,33 @@ enum { * it will be 512 in case of a 2KiB page NAND flash with 4 512-byte sub-pages. * * But in rare cases, if this optimizes things, the VID header may be placed to - * a different offset. For example, the boot-loader might do things faster if the - * VID header sits at the end of the first 2KiB NAND page with 4 sub-pages. As - * the boot-loader would not normally need to read EC headers (unless it needs - * UBI in RW mode), it might be faster to calculate ECC. This is weird example, - * but it real-life example. So, in this example, @vid_hdr_offer would be - * 2KiB-64 bytes = 1984. Note, that this position is not even 512-bytes - * aligned, which is OK, as UBI is clever enough to realize this is 4th sub-page - * of the first page and add needed padding. + * a different offset. For example, the boot-loader might do things faster if + * the VID header sits at the end of the first 2KiB NAND page with 4 sub-pages. + * As the boot-loader would not normally need to read EC headers (unless it + * needs UBI in RW mode), it might be faster to calculate ECC. This is weird + * example, but it real-life example. So, in this example, @vid_hdr_offer would + * be 2KiB-64 bytes = 1984. Note, that this position is not even 512-bytes + * aligned, which is OK, as UBI is clever enough to realize this is 4th + * sub-page of the first page and add needed padding. + * + * The @max_beb_per1024 is the maximum amount of bad PEBs UBI expects on the + * UBI device per 1024 eraseblocks. This value is often given in an other form + * in the NAND datasheet (min NVB i.e. minimal number of valid blocks). The + * maximum expected bad eraseblocks per 1024 is then: + * 1024 * (1 - MinNVB / MaxNVB) + * Which gives 20 for most NAND devices. This limit is used in order to derive + * amount of eraseblock UBI reserves for handling new bad blocks. If the device + * has more bad eraseblocks than this limit, UBI does not reserve any physical + * eraseblocks for new bad eraseblocks, but attempts to use available + * eraseblocks (if any). The accepted range is 0-768. If 0 is given, the + * default kernel value of %CONFIG_MTD_UBI_BEB_LIMIT will be used. */ struct ubi_attach_req { - int32_t ubi_num; - int32_t mtd_num; - int32_t vid_hdr_offset; - uint8_t padding[12]; + __s32 ubi_num; + __s32 mtd_num; + __s32 vid_hdr_offset; + __s16 max_beb_per1024; + __s8 padding[10]; }; /** @@ -212,15 +290,15 @@ struct ubi_attach_req { * BLOBs, without caring about how to properly align them. */ struct ubi_mkvol_req { - int32_t vol_id; - int32_t alignment; - int64_t bytes; - int8_t vol_type; - int8_t padding1; - int16_t name_len; - int8_t padding2[4]; + __s32 vol_id; + __s32 alignment; + __s64 bytes; + __s8 vol_type; + __s8 padding1; + __s16 name_len; + __s8 padding2[4]; char name[UBI_MAX_VOLUME_NAME + 1]; -} __attribute__ ((packed)); +} __packed; /** * struct ubi_rsvol_req - a data structure used in volume re-size requests. @@ -229,28 +307,105 @@ struct ubi_mkvol_req { * * Re-sizing is possible for both dynamic and static volumes. But while dynamic * volumes may be re-sized arbitrarily, static volumes cannot be made to be - * smaller then the number of bytes they bear. To arbitrarily shrink a static + * smaller than the number of bytes they bear. To arbitrarily shrink a static * volume, it must be wiped out first (by means of volume update operation with * zero number of bytes). */ struct ubi_rsvol_req { - int64_t bytes; - int32_t vol_id; -} __attribute__ ((packed)); + __s64 bytes; + __s32 vol_id; +} __packed; /** - * struct ubi_leb_change_req - a data structure used in atomic logical - * eraseblock change requests. + * struct ubi_rnvol_req - volumes re-name request. + * @count: count of volumes to re-name + * @padding1: reserved for future, not used, has to be zeroed + * @vol_id: ID of the volume to re-name + * @name_len: name length + * @padding2: reserved for future, not used, has to be zeroed + * @name: new volume name + * + * UBI allows to re-name up to %32 volumes at one go. The count of volumes to + * re-name is specified in the @count field. The ID of the volumes to re-name + * and the new names are specified in the @vol_id and @name fields. + * + * The UBI volume re-name operation is atomic, which means that should power cut + * happen, the volumes will have either old name or new name. So the possible + * use-cases of this command is atomic upgrade. Indeed, to upgrade, say, volumes + * A and B one may create temporary volumes %A1 and %B1 with the new contents, + * then atomically re-name A1->A and B1->B, in which case old %A and %B will + * be removed. + * + * If it is not desirable to remove old A and B, the re-name request has to + * contain 4 entries: A1->A, A->A1, B1->B, B->B1, in which case old A1 and B1 + * become A and B, and old A and B will become A1 and B1. + * + * It is also OK to request: A1->A, A1->X, B1->B, B->Y, in which case old A1 + * and B1 become A and B, and old A and B become X and Y. + * + * In other words, in case of re-naming into an existing volume name, the + * existing volume is removed, unless it is re-named as well at the same + * re-name request. + */ +struct ubi_rnvol_req { + __s32 count; + __s8 padding1[12]; + struct { + __s32 vol_id; + __s16 name_len; + __s8 padding2[2]; + char name[UBI_MAX_VOLUME_NAME + 1]; + } ents[UBI_MAX_RNVOL]; +} __packed; + +/** + * struct ubi_leb_change_req - a data structure used in atomic LEB change + * requests. * @lnum: logical eraseblock number to change * @bytes: how many bytes will be written to the logical eraseblock - * @dtype: data type (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN) + * @dtype: pass "3" for better compatibility with old kernels * @padding: reserved for future, not used, has to be zeroed + * + * The @dtype field used to inform UBI about what kind of data will be written + * to the LEB: long term (value 1), short term (value 2), unknown (value 3). + * UBI tried to pick a PEB with lower erase counter for short term data and a + * PEB with higher erase counter for long term data. But this was not really + * used because users usually do not know this and could easily mislead UBI. We + * removed this feature in May 2012. UBI currently just ignores the @dtype + * field. But for better compatibility with older kernels it is recommended to + * set @dtype to 3 (unknown). */ struct ubi_leb_change_req { - int32_t lnum; - int32_t bytes; - uint8_t dtype; - uint8_t padding[7]; -} __attribute__ ((packed)); + __s32 lnum; + __s32 bytes; + __s8 dtype; /* obsolete, do not use! */ + __s8 padding[7]; +} __packed; + +/** + * struct ubi_map_req - a data structure used in map LEB requests. + * @dtype: pass "3" for better compatibility with old kernels + * @lnum: logical eraseblock number to unmap + * @padding: reserved for future, not used, has to be zeroed + */ +struct ubi_map_req { + __s32 lnum; + __s8 dtype; /* obsolete, do not use! */ + __s8 padding[3]; +} __packed; + + +/** + * struct ubi_set_vol_prop_req - a data structure used to set an UBI volume + * property. + * @property: property to set (%UBI_VOL_PROP_DIRECT_WRITE) + * @padding: reserved for future, not used, has to be zeroed + * @value: value to set + */ +struct ubi_set_vol_prop_req { + __u8 property; + __u8 padding[7]; + __u64 value; +} __packed; #endif /* __UBI_USER_H__ */ diff --git a/include/usb/lin_gadget_compat.h b/include/usb/lin_gadget_compat.h index a25e9d9..29fb166 100644 --- a/include/usb/lin_gadget_compat.h +++ b/include/usb/lin_gadget_compat.h @@ -13,22 +13,6 @@ #include <linux/compat.h> /* common */ -#define spin_lock_init(...) -#define spin_lock(...) -#define spin_lock_irqsave(lock, flags) do { debug("%lu\n", flags); } while (0) -#define spin_unlock(...) -#define spin_unlock_irqrestore(lock, flags) do {flags = 0; } while (0) -#define disable_irq(...) -#define enable_irq(...) - -#define mutex_init(...) -#define mutex_lock(...) -#define mutex_unlock(...) - -#define GFP_KERNEL 0 - -#define IRQ_HANDLED 1 - #define ENOTSUPP 524 /* Operation is not supported */ #define BITS_PER_BYTE 8 |