From 886d518ca9a6b433736f74723813d8917e407f40 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 22 Sep 2024 16:14:12 -0700 Subject: [PATCH 01/33] MAINTAINERS: Add unsafe_memcpy() to the FORTIFY review list Usually it's possible to avoid adding an unsafe_memcpy() uses, so give the FORTIFY reviewers a chance to help avoid lying to the compiler about the destination buffer's type/size/etc. Signed-off-by: Kees Cook --- --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 36c0af94cf0861..3a29f123e1be3b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8772,6 +8772,7 @@ F: include/linux/fortify-string.h F: lib/fortify_kunit.c F: lib/memcpy_kunit.c F: lib/test_fortify/* +K: \bunsafe_memcpy\b K: \b__NO_FORTIFY\b FPGA DFL DRIVERS From dd3a7ee91e0ce0b03d22e974a79e8247cc99959b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 28 Sep 2024 11:13:13 -0700 Subject: [PATCH 02/33] hardening: Adjust dependencies in selection of MODVERSIONS MODVERSIONS recently grew a dependency on !COMPILE_TEST so that Rust could be more easily tested. However, this introduces a Kconfig warning when building allmodconfig with a clang version that supports RANDSTRUCT natively because RANDSTRUCT_FULL and RANDSTRUCT_PERFORMANCE select MODVERSIONS when MODULES is enabled, bypassing the !COMPILE_TEST dependency: WARNING: unmet direct dependencies detected for MODVERSIONS Depends on [n]: MODULES [=y] && !COMPILE_TEST [=y] Selected by [y]: - RANDSTRUCT_FULL [=y] && (CC_HAS_RANDSTRUCT [=y] || GCC_PLUGINS [=n]) && MODULES [=y] Add the !COMPILE_TEST dependency to the selections to clear up the warning. Fixes: 1f9c4a996756 ("Kbuild: make MODVERSIONS support depend on not being a compile test build") Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20240928-fix-randstruct-modversions-kconfig-warning-v1-1-27d3edc8571e@kernel.org Signed-off-by: Kees Cook --- security/Kconfig.hardening | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 2cff851ebfd7e1..c9d5ca3d8d08de 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -340,7 +340,7 @@ choice config RANDSTRUCT_FULL bool "Fully randomize structure layout" depends on CC_HAS_RANDSTRUCT || GCC_PLUGINS - select MODVERSIONS if MODULES + select MODVERSIONS if MODULES && !COMPILE_TEST help Fully randomize the member layout of sensitive structures as much as possible, which may have both a @@ -356,7 +356,7 @@ choice config RANDSTRUCT_PERFORMANCE bool "Limit randomization of structure layout to cache-lines" depends on GCC_PLUGINS - select MODVERSIONS if MODULES + select MODVERSIONS if MODULES && !COMPILE_TEST help Randomization of sensitive kernel structures will make a best effort at restricting randomization to cacheline-sized From 045244dd5d75c61ae37b7b96fe0a95805bd1842d Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 28 Sep 2024 11:26:09 -0700 Subject: [PATCH 03/33] MAINTAINERS: Add security/Kconfig.hardening to hardening section When running get_maintainer.pl on security/Kconfig.hardening, only the security subsystem folks show up, even though they have never taken patches to this file: $ scripts/get_maintainer.pl security/Kconfig.hardening Paul Moore <...> (supporter:SECURITY SUBSYSTEM) James Morris <...> (supporter:SECURITY SUBSYSTEM) "Serge E. Hallyn" <...> (supporter:SECURITY SUBSYSTEM) linux-security-module@vger.kernel.org (open list:SECURITY SUBSYSTEM) linux-kernel@vger.kernel.org (open list) $ git log --format=%cn --no-merges security/Kconfig.hardening | sort | uniq -c 3 Andrew Morton 1 Greg Kroah-Hartman 18 Kees Cook 2 Linus Torvald Add it to the hardening section so that the KSPP folks are also shown, which matches reality over who should comment on and take said patches if necessary. Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20240928-maintainers-security-kconfig-hardening-v1-1-c8c64071cc02@kernel.org Signed-off-by: Kees Cook --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3a29f123e1be3b..fecc7c696737ed 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12152,6 +12152,7 @@ F: include/linux/randomize_kstack.h F: kernel/configs/hardening.config F: lib/usercopy_kunit.c F: mm/usercopy.c +F: security/Kconfig.hardening K: \b(add|choose)_random_kstack_offset\b K: \b__check_(object_size|heap_object)\b K: \b__counted_by\b From ee1e3c46ed19c096be22472c728fa7f68b1352c4 Mon Sep 17 00:00:00 2001 From: Ben Cheatham Date: Fri, 27 Sep 2024 11:34:28 -0500 Subject: [PATCH 04/33] EINJ, CXL: Fix CXL device SBDF calculation The SBDF of the target CXL 2.0 compliant root port is required to inject a CXL protocol error as per ACPI 6.5. The SBDF given has to be in the following format: 31 24 23 16 15 11 10 8 7 0 +-------------------------------------------------+ | segment | bus | device | function | reserved | +-------------------------------------------------+ The SBDF calculated in cxl_dport_get_sbdf() doesn't account for the reserved bits currently, causing the wrong SBDF to be used. Fix said calculation to properly shift the SBDF. Without this fix, error injection into CXL 2.0 root ports through the CXL debugfs interface (/cxl) is broken. Injection through the legacy interface (/apei/einj/) will still work because the SBDF is manually provided by the user. Fixes: 12fb28ea6b1cf ("EINJ: Add CXL error type support") Signed-off-by: Ben Cheatham Reviewed-by: Dan Williams Tested-by: Srinivasulu Thanneeru Reviewed-by: Srinivasulu Thanneeru Link: https://patch.msgid.link/20240927163428.366557-1-Benjamin.Cheatham@amd.com Signed-off-by: Ira Weiny --- drivers/acpi/apei/einj-cxl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/apei/einj-cxl.c b/drivers/acpi/apei/einj-cxl.c index 4f81a119ec0896..a4e709937236f6 100644 --- a/drivers/acpi/apei/einj-cxl.c +++ b/drivers/acpi/apei/einj-cxl.c @@ -63,7 +63,7 @@ static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf) seg = bridge->domain_nr; bus = pbus->number; - *sbdf = (seg << 24) | (bus << 16) | dport_dev->devfn; + *sbdf = (seg << 24) | (bus << 16) | (dport_dev->devfn << 8); return 0; } From b6e05ba0844139dde138625906015c974c86aa93 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 23 Sep 2024 12:00:13 +0800 Subject: [PATCH 05/33] spi: spi-imx: Fix pm_runtime_set_suspended() with runtime pm enabled It is not valid to call pm_runtime_set_suspended() for devices with runtime PM enabled because it returns -EAGAIN if it is enabled already and working. So, call pm_runtime_disable() before to fix it. Fixes: 43b6bf406cd0 ("spi: imx: fix runtime pm support for !CONFIG_PM") Signed-off-by: Jinjie Ruan Link: https://patch.msgid.link/20240923040015.3009329-2-ruanjinjie@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 85bd1a82a34eb4..4c31d36f3130a9 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1865,8 +1865,8 @@ static int spi_imx_probe(struct platform_device *pdev) spi_imx_sdma_exit(spi_imx); out_runtime_pm_put: pm_runtime_dont_use_autosuspend(spi_imx->dev); - pm_runtime_set_suspended(&pdev->dev); pm_runtime_disable(spi_imx->dev); + pm_runtime_set_suspended(&pdev->dev); clk_disable_unprepare(spi_imx->clk_ipg); out_put_per: From 67d4a70faa662df07451e83db1546d3ca0695e08 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 23 Sep 2024 12:00:14 +0800 Subject: [PATCH 06/33] spi: spi-cadence: Fix pm_runtime_set_suspended() with runtime pm enabled It is not valid to call pm_runtime_set_suspended() for devices with runtime PM enabled because it returns -EAGAIN if it is enabled already and working. So, call pm_runtime_disable() before to fix it. Fixes: d36ccd9f7ea4 ("spi: cadence: Runtime pm adaptation") Signed-off-by: Jinjie Ruan Link: https://patch.msgid.link/20240923040015.3009329-3-ruanjinjie@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index e07e081de5ea41..087e748d9cc958 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -678,8 +678,8 @@ static int cdns_spi_probe(struct platform_device *pdev) clk_dis_all: if (!spi_controller_is_target(ctlr)) { - pm_runtime_set_suspended(&pdev->dev); pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); } remove_ctlr: spi_controller_put(ctlr); @@ -701,8 +701,8 @@ static void cdns_spi_remove(struct platform_device *pdev) cdns_spi_write(xspi, CDNS_SPI_ER, CDNS_SPI_ER_DISABLE); - pm_runtime_set_suspended(&pdev->dev); pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); spi_unregister_controller(ctlr); } From 3eae4a916fc0eb6f85b5d399e10335dbd24dd765 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 23 Sep 2024 12:00:15 +0800 Subject: [PATCH 07/33] spi: spi-cadence: Fix missing spi_controller_is_target() check The spi_controller_is_target() check is missing for pm_runtime_disable() in cdns_spi_remove(), add it. Fixes: b1b90514eaa3 ("spi: spi-cadence: Add support for Slave mode") Signed-off-by: Jinjie Ruan Link: https://patch.msgid.link/20240923040015.3009329-4-ruanjinjie@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index 087e748d9cc958..3c87d2bf786a9e 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -701,8 +701,10 @@ static void cdns_spi_remove(struct platform_device *pdev) cdns_spi_write(xspi, CDNS_SPI_ER, CDNS_SPI_ER_DISABLE); - pm_runtime_disable(&pdev->dev); - pm_runtime_set_suspended(&pdev->dev); + if (!spi_controller_is_target(ctlr)) { + pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); + } spi_unregister_controller(ctlr); } From 162d9b5d2308c7e48efbc97d36babbf4d73b2c61 Mon Sep 17 00:00:00 2001 From: Alexander Dahl Date: Thu, 26 Sep 2024 11:03:56 +0200 Subject: [PATCH 08/33] spi: atmel-quadspi: Fix wrong register value written to MR aq->mr should go to MR, nothing else. Fixes: 329ca3eed4a9 ("spi: atmel-quadspi: Avoid overwriting delay register settings") Signed-off-by: Alexander Dahl Link: https://lore.kernel.org/linux-spi/20240926-macarena-wincing-7c4995487a29@thorsis.com/T/#u Link: https://patch.msgid.link/20240926090356.105789-1-ada@thorsis.com Signed-off-by: Mark Brown --- drivers/spi/atmel-quadspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index 4f288f07e38f91..95cdfc28361ef7 100644 --- a/drivers/spi/atmel-quadspi.c +++ b/drivers/spi/atmel-quadspi.c @@ -377,7 +377,7 @@ static int atmel_qspi_set_cfg(struct atmel_qspi *aq, */ if (!(aq->mr & QSPI_MR_SMM)) { aq->mr |= QSPI_MR_SMM; - atmel_qspi_write(aq->scr, aq, QSPI_MR); + atmel_qspi_write(aq->mr, aq, QSPI_MR); } /* Clear pending interrupts */ From 048bbbdbf85e5e00258dfb12f5e368f908801d7b Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 30 Sep 2024 21:27:41 +0200 Subject: [PATCH 09/33] i2c: stm32f7: Do not prepare/unprepare clock during runtime suspend/resume In case there is any sort of clock controller attached to this I2C bus controller, for example Versaclock or even an AIC32x4 I2C codec, then an I2C transfer triggered from the clock controller clk_ops .prepare callback may trigger a deadlock on drivers/clk/clk.c prepare_lock mutex. This is because the clock controller first grabs the prepare_lock mutex and then performs the prepare operation, including its I2C access. The I2C access resumes this I2C bus controller via .runtime_resume callback, which calls clk_prepare_enable(), which attempts to grab the prepare_lock mutex again and deadlocks. Since the clock are already prepared since probe() and unprepared in remove(), use simple clk_enable()/clk_disable() calls to enable and disable the clock on runtime suspend and resume, to avoid hitting the prepare_lock mutex. Acked-by: Alain Volmat Signed-off-by: Marek Vasut Fixes: 4e7bca6fc07b ("i2c: i2c-stm32f7: add PM Runtime support") Cc: # v5.0+ Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-stm32f7.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index cfee2d9c09de36..0174ead99de6c1 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -2395,7 +2395,7 @@ static int __maybe_unused stm32f7_i2c_runtime_suspend(struct device *dev) struct stm32f7_i2c_dev *i2c_dev = dev_get_drvdata(dev); if (!stm32f7_i2c_is_slave_registered(i2c_dev)) - clk_disable_unprepare(i2c_dev->clk); + clk_disable(i2c_dev->clk); return 0; } @@ -2406,9 +2406,9 @@ static int __maybe_unused stm32f7_i2c_runtime_resume(struct device *dev) int ret; if (!stm32f7_i2c_is_slave_registered(i2c_dev)) { - ret = clk_prepare_enable(i2c_dev->clk); + ret = clk_enable(i2c_dev->clk); if (ret) { - dev_err(dev, "failed to prepare_enable clock\n"); + dev_err(dev, "failed to enable clock\n"); return ret; } } From 68a16708d2503b6303d67abd43801e2ca40c208d Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 24 Sep 2024 14:40:08 +0100 Subject: [PATCH 10/33] spi: s3c64xx: fix timeout counters in flush_fifo In the s3c64xx_flush_fifo() code, the loops counter is post-decremented in the do { } while(test && loops--) condition. This means the loops is left at the unsigned equivalent of -1 if the loop times out. The test after will never pass as if tests for loops == 0. Signed-off-by: Ben Dooks Fixes: 230d42d422e7 ("spi: Add s3c64xx SPI Controller driver") Reviewed-by: Andi Shyti Link: https://patch.msgid.link/20240924134009.116247-2-ben.dooks@codethink.co.uk Signed-off-by: Mark Brown --- drivers/spi/spi-s3c64xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index 51a002b3f51887..8c9e5e97041f9c 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -245,7 +245,7 @@ static void s3c64xx_flush_fifo(struct s3c64xx_spi_driver_data *sdd) loops = msecs_to_loops(1); do { val = readl(regs + S3C64XX_SPI_STATUS); - } while (TX_FIFO_LVL(val, sdd) && loops--); + } while (TX_FIFO_LVL(val, sdd) && --loops); if (loops == 0) dev_warn(&sdd->pdev->dev, "Timed out flushing TX FIFO\n"); @@ -258,7 +258,7 @@ static void s3c64xx_flush_fifo(struct s3c64xx_spi_driver_data *sdd) readl(regs + S3C64XX_SPI_RX_DATA); else break; - } while (loops--); + } while (--loops); if (loops == 0) dev_warn(&sdd->pdev->dev, "Timed out flushing RX FIFO\n"); From e764e68103c12aef161480b8da984c36ca99cfb5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 30 Sep 2024 18:46:48 -0400 Subject: [PATCH 11/33] bcachefs: Fix bad shift in bch2_read_flag_list() Signed-off-by: Kent Overstreet --- fs/bcachefs/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 42f565c7618171..e0a876cbaa6b78 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -222,7 +222,7 @@ u64 bch2_read_flag_list(const char *opt, const char * const list[]) break; } - ret |= 1 << flag; + ret |= BIT_ULL(flag); } kfree(d); From 9af48210ea5f1539e1999154b0acd343efdb370b Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 30 Sep 2024 11:06:50 +0200 Subject: [PATCH 12/33] xen: Fix config option reference in XEN_PRIVCMD definition Commit 2fae6bb7be32 ("xen/privcmd: Add new syscall to get gsi from dev") adds a weak reverse dependency to the config XEN_PRIVCMD definition, referring to CONFIG_XEN_PCIDEV_BACKEND. In Kconfig files, one refers to config options without the CONFIG prefix, though. So in its current form, this does not create the reverse dependency as intended, but is an attribute with no effect. Refer to the intended config option XEN_PCIDEV_BACKEND in the XEN_PRIVCMD definition. Fixes: 2fae6bb7be32 ("xen/privcmd: Add new syscall to get gsi from dev") Signed-off-by: Lukas Bulwahn Reviewed-by: Juergen Gross Message-ID: <20240930090650.429813-1-lukas.bulwahn@redhat.com> Signed-off-by: Juergen Gross --- drivers/xen/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 62035fe16bb8ac..72ddee4c1544df 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -261,7 +261,7 @@ config XEN_SCSI_BACKEND config XEN_PRIVCMD tristate "Xen hypercall passthrough driver" depends on XEN - imply CONFIG_XEN_PCIDEV_BACKEND + imply XEN_PCIDEV_BACKEND default m help The hypercall passthrough driver allows privileged user programs to From abaa6d4f6ab8371c5b73afb726ff1c012526e999 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 1 Oct 2024 17:43:36 -0400 Subject: [PATCH 13/33] bcachefs: Fix return type of dirent_points_to_inode_nowarn() we're returning an error code now, not a bool Reported-by: Dan Carpenter Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 0d8b782b63fb63..c6c98ee87ec91f 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -28,8 +28,8 @@ static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, inode->bi_dir_offset == d.k->p.offset; } -static bool dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d, - struct bch_inode_unpacked *inode) +static int dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d, + struct bch_inode_unpacked *inode) { if (d.v->d_type == DT_SUBVOL ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol From 3b1425a4eb4e9750db8620c26e39390411eea185 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 2 Oct 2024 21:31:31 -0400 Subject: [PATCH 14/33] bcachefs: Fix bch2_inode_is_open() check Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index c6c98ee87ec91f..351de61c7ed120 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1106,7 +1106,7 @@ static int check_inode(struct btree_trans *trans, if (ret) goto err; } else { - if (fsck_err_on(bch2_inode_is_open(c, k.k->p), + if (fsck_err_on(!bch2_inode_is_open(c, k.k->p), trans, inode_unlinked_and_not_open, "inode %llu%u unlinked and not open", u.bi_inum, u.bi_snapshot)) { From d28786606a51620df7b7a3e7231338d9bc081656 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 2 Oct 2024 21:35:38 -0400 Subject: [PATCH 15/33] bcachefs: Fix trans_commit disk accounting revert We only are applying JSET_ENTRY_TYPE_write_buffer_keys, revert path was missed. Fixes: a3581ca35d2b ("bcachefs: Fix BCH_TRANS_COMMIT_skip_accounting_apply") Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_trans_commit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 1a74a1a252ee23..9bf471fa436140 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -832,7 +832,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, for (struct jset_entry *entry2 = trans->journal_entries; entry2 != entry; entry2 = vstruct_next(entry2)) - if (jset_entry_is_key(entry2) && entry2->start->k.type == KEY_TYPE_accounting) { + if (entry2->type == BCH_JSET_ENTRY_write_buffer_keys && + entry2->start->k.type == KEY_TYPE_accounting) { struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); bch2_accounting_neg(a); From c5e3cdbf2afedef77b64229fd0aed693abf0a0c4 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 3 Oct 2024 16:43:39 -0400 Subject: [PATCH 16/33] tomoyo: revert CONFIG_SECURITY_TOMOYO_LKM support This patch reverts two TOMOYO patches that were merged into Linus' tree during the v6.12 merge window: 8b985bbfabbe ("tomoyo: allow building as a loadable LSM module") 268225a1de1a ("tomoyo: preparation step for building as a loadable LSM module") Together these two patches introduced the CONFIG_SECURITY_TOMOYO_LKM Kconfig build option which enabled a TOMOYO specific dynamic LSM loading mechanism (see the original commits for more details). Unfortunately, this approach was widely rejected by the LSM community as well as some members of the general kernel community. Objections included concerns over setting a bad precedent regarding individual LSMs managing their LSM callback registrations as well as general kernel symbol exporting practices. With little to no support for the CONFIG_SECURITY_TOMOYO_LKM approach outside of Tetsuo, and multiple objections, we need to revert these changes. Link: https://lore.kernel.org/all/0c4b443a-9c72-4800-97e8-a3816b6a9ae2@I-love.SAKURA.ne.jp Link: https://lore.kernel.org/all/CAHC9VhR=QjdoHG3wJgHFJkKYBg7vkQH2MpffgVzQ0tAByo_wRg@mail.gmail.com Acked-by: John Johansen Signed-off-by: Paul Moore --- security/tomoyo/Kconfig | 15 -- security/tomoyo/Makefile | 8 +- security/tomoyo/common.c | 14 +- security/tomoyo/common.h | 72 ----- security/tomoyo/gc.c | 3 - security/tomoyo/init.c | 366 -------------------------- security/tomoyo/load_policy.c | 12 - security/tomoyo/proxy.c | 82 ------ security/tomoyo/securityfs_if.c | 10 +- security/tomoyo/{hooks.h => tomoyo.c} | 110 +++++++- security/tomoyo/util.c | 3 + 11 files changed, 118 insertions(+), 577 deletions(-) delete mode 100644 security/tomoyo/init.c delete mode 100644 security/tomoyo/proxy.c rename security/tomoyo/{hooks.h => tomoyo.c} (79%) diff --git a/security/tomoyo/Kconfig b/security/tomoyo/Kconfig index 90eccc6cd46492..1e0dd1a6d0b0e9 100644 --- a/security/tomoyo/Kconfig +++ b/security/tomoyo/Kconfig @@ -13,21 +13,6 @@ config SECURITY_TOMOYO found at . If you are unsure how to answer this question, answer N. -config SECURITY_TOMOYO_LKM - bool "Cut out most of TOMOYO's code to a loadable kernel module" - default n - depends on SECURITY_TOMOYO - depends on MODULES - help - Say Y here if you want to include TOMOYO without bloating - vmlinux file. If you say Y, most of TOMOYO code is cut out to - a loadable kernel module named tomoyo.ko . This option will be - useful for kernels built by Linux distributors where TOMOYO is - included but TOMOYO is not enabled by default. Please be sure - to explicitly load tomoyo.ko if you want to activate TOMOYO - without calling userspace policy loader, for tomoyo.ko is - loaded immediately before calling userspace policy loader. - config SECURITY_TOMOYO_MAX_ACCEPT_ENTRY int "Default maximal count for learning mode" default 2048 diff --git a/security/tomoyo/Makefile b/security/tomoyo/Makefile index 287a7d16fa150c..55c67b9846a932 100644 --- a/security/tomoyo/Makefile +++ b/security/tomoyo/Makefile @@ -1,11 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -tomoyo-objs := audit.o common.o condition.o domain.o environ.o file.o gc.o group.o memory.o mount.o network.o proxy.o realpath.o securityfs_if.o util.o -obj-y += init.o load_policy.o -ifdef CONFIG_SECURITY_TOMOYO_LKM -obj-m += tomoyo.o -else -obj-y += tomoyo.o -endif +obj-y = audit.o common.o condition.o domain.o environ.o file.o gc.o group.o load_policy.o memory.o mount.o network.o realpath.o securityfs_if.o tomoyo.o util.o targets += builtin-policy.h diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index c0ef014f8009aa..5c7b059a332aac 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -998,13 +998,8 @@ static bool tomoyo_select_domain(struct tomoyo_io_buffer *head, p = find_task_by_pid_ns(pid, &init_pid_ns); else p = find_task_by_vpid(pid); - if (p) { + if (p) domain = tomoyo_task(p)->domain_info; -#ifdef CONFIG_SECURITY_TOMOYO_LKM - if (!domain) - domain = &tomoyo_kernel_domain; -#endif - } rcu_read_unlock(); } else if (!strncmp(data, "domain=", 7)) { if (tomoyo_domain_def(data + 7)) @@ -1715,13 +1710,8 @@ static void tomoyo_read_pid(struct tomoyo_io_buffer *head) p = find_task_by_pid_ns(pid, &init_pid_ns); else p = find_task_by_vpid(pid); - if (p) { + if (p) domain = tomoyo_task(p)->domain_info; -#ifdef CONFIG_SECURITY_TOMOYO_LKM - if (!domain) - domain = &tomoyo_kernel_domain; -#endif - } rcu_read_unlock(); if (!domain) return; diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h index 4f6c52a9f47801..0e8e2e959aef4e 100644 --- a/security/tomoyo/common.h +++ b/security/tomoyo/common.h @@ -978,7 +978,6 @@ int tomoyo_get_mode(const struct tomoyo_policy_namespace *ns, const u8 profile, int tomoyo_init_request_info(struct tomoyo_request_info *r, struct tomoyo_domain_info *domain, const u8 index); -int __init tomoyo_interface_init(void); int tomoyo_mkdev_perm(const u8 operation, const struct path *path, const unsigned int mode, unsigned int dev); int tomoyo_mount_permission(const char *dev_name, const struct path *path, @@ -1215,14 +1214,10 @@ static inline void tomoyo_put_group(struct tomoyo_group *group) * * Returns pointer to "struct tomoyo_task" for specified thread. */ -#ifdef CONFIG_SECURITY_TOMOYO_LKM -extern struct tomoyo_task *tomoyo_task(struct task_struct *task); -#else static inline struct tomoyo_task *tomoyo_task(struct task_struct *task) { return task->security + tomoyo_blob_sizes.lbs_task; } -#endif /** * tomoyo_same_name_union - Check for duplicated "struct tomoyo_name_union" entry. @@ -1289,71 +1284,4 @@ static inline struct tomoyo_policy_namespace *tomoyo_current_namespace(void) pos = srcu_dereference((head)->next, &tomoyo_ss); \ for ( ; pos != (head); pos = srcu_dereference(pos->next, &tomoyo_ss)) -#ifdef CONFIG_SECURITY_TOMOYO_LKM - -#define LSM_HOOK(RET, DEFAULT, NAME, ...) typedef RET (NAME##_t)(__VA_ARGS__); -#include -#undef LSM_HOOK - -struct tomoyo_hooks { - cred_prepare_t *cred_prepare; - bprm_committed_creds_t *bprm_committed_creds; - task_alloc_t *task_alloc; - task_free_t *task_free; - bprm_check_security_t *bprm_check_security; - file_fcntl_t *file_fcntl; - file_open_t *file_open; - file_truncate_t *file_truncate; - path_truncate_t *path_truncate; - path_unlink_t *path_unlink; - path_mkdir_t *path_mkdir; - path_rmdir_t *path_rmdir; - path_symlink_t *path_symlink; - path_mknod_t *path_mknod; - path_link_t *path_link; - path_rename_t *path_rename; - inode_getattr_t *inode_getattr; - file_ioctl_t *file_ioctl; - file_ioctl_compat_t *file_ioctl_compat; - path_chmod_t *path_chmod; - path_chown_t *path_chown; - path_chroot_t *path_chroot; - sb_mount_t *sb_mount; - sb_umount_t *sb_umount; - sb_pivotroot_t *sb_pivotroot; - socket_bind_t *socket_bind; - socket_connect_t *socket_connect; - socket_listen_t *socket_listen; - socket_sendmsg_t *socket_sendmsg; -}; - -extern void tomoyo_register_hooks(const struct tomoyo_hooks *tomoyo_hooks); - -struct tomoyo_operations { - void (*check_profile)(void); - int enabled; -}; - -extern struct tomoyo_operations tomoyo_ops; - -/* - * Temporary hack: functions needed by tomoyo.ko . This will be removed - * after all functions are marked as EXPORT_STMBOL_GPL(). - */ -struct tomoyo_tmp_exports { - struct task_struct * (*find_task_by_vpid)(pid_t nr); - struct task_struct * (*find_task_by_pid_ns)(pid_t nr, struct pid_namespace *ns); - void (*put_filesystem)(struct file_system_type *fs); - struct file * (*get_mm_exe_file)(struct mm_struct *mm); - char * (*d_absolute_path)(const struct path *path, char *buf, int buflen); -}; -extern const struct tomoyo_tmp_exports tomoyo_tmp_exports; -#define find_task_by_vpid tomoyo_tmp_exports.find_task_by_vpid -#define find_task_by_pid_ns tomoyo_tmp_exports.find_task_by_pid_ns -#define put_filesystem tomoyo_tmp_exports.put_filesystem -#define get_mm_exe_file tomoyo_tmp_exports.get_mm_exe_file -#define d_absolute_path tomoyo_tmp_exports.d_absolute_path - -#endif /* defined(CONFIG_SECURITY_TOMOYO_LKM) */ - #endif /* !defined(_SECURITY_TOMOYO_COMMON_H) */ diff --git a/security/tomoyo/gc.c b/security/tomoyo/gc.c index 6eccca15083987..026e29ea3796c3 100644 --- a/security/tomoyo/gc.c +++ b/security/tomoyo/gc.c @@ -9,9 +9,6 @@ #include #include -/* Lock for GC. */ -DEFINE_SRCU(tomoyo_ss); - /** * tomoyo_memory_free - Free memory for elements. * diff --git a/security/tomoyo/init.c b/security/tomoyo/init.c deleted file mode 100644 index 034e7db22d4ec3..00000000000000 --- a/security/tomoyo/init.c +++ /dev/null @@ -1,366 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * security/tomoyo/init.c - * - * Copyright (C) 2005-2011 NTT DATA CORPORATION - */ - -#include -#include -#include "common.h" - -#ifndef CONFIG_SECURITY_TOMOYO_LKM - -#include "hooks.h" - -#else - -#define DEFINE_STATIC_CALL_PROXY(NAME) \ - static NAME##_t tomoyo_##NAME; \ - DEFINE_STATIC_CALL_RET0(tomoyo_##NAME, tomoyo_##NAME); -DEFINE_STATIC_CALL_PROXY(cred_prepare) -DEFINE_STATIC_CALL_PROXY(bprm_committed_creds) -DEFINE_STATIC_CALL_PROXY(bprm_check_security) -DEFINE_STATIC_CALL_PROXY(inode_getattr) -DEFINE_STATIC_CALL_PROXY(path_truncate) -DEFINE_STATIC_CALL_PROXY(file_truncate) -DEFINE_STATIC_CALL_PROXY(path_unlink) -DEFINE_STATIC_CALL_PROXY(path_mkdir) -DEFINE_STATIC_CALL_PROXY(path_rmdir) -DEFINE_STATIC_CALL_PROXY(path_symlink) -DEFINE_STATIC_CALL_PROXY(path_mknod) -DEFINE_STATIC_CALL_PROXY(path_link) -DEFINE_STATIC_CALL_PROXY(path_rename) -DEFINE_STATIC_CALL_PROXY(file_fcntl) -DEFINE_STATIC_CALL_PROXY(file_open) -DEFINE_STATIC_CALL_PROXY(file_ioctl) -DEFINE_STATIC_CALL_PROXY(path_chmod) -DEFINE_STATIC_CALL_PROXY(path_chown) -DEFINE_STATIC_CALL_PROXY(path_chroot) -DEFINE_STATIC_CALL_PROXY(sb_mount) -DEFINE_STATIC_CALL_PROXY(sb_umount) -DEFINE_STATIC_CALL_PROXY(sb_pivotroot) -DEFINE_STATIC_CALL_PROXY(socket_listen) -DEFINE_STATIC_CALL_PROXY(socket_connect) -DEFINE_STATIC_CALL_PROXY(socket_bind) -DEFINE_STATIC_CALL_PROXY(socket_sendmsg) -DEFINE_STATIC_CALL_PROXY(task_alloc) -DEFINE_STATIC_CALL_PROXY(task_free) -#undef DEFINE_STATIC_CALL_PROXY - -static int tomoyo_cred_prepare(struct cred *new, const struct cred *old, gfp_t gfp) -{ - return static_call(tomoyo_cred_prepare)(new, old, gfp); -} - -static void tomoyo_bprm_committed_creds(const struct linux_binprm *bprm) -{ - static_call(tomoyo_bprm_committed_creds)(bprm); -} - -static int tomoyo_bprm_check_security(struct linux_binprm *bprm) -{ - return static_call(tomoyo_bprm_check_security)(bprm); -} - -static int tomoyo_inode_getattr(const struct path *path) -{ - return static_call(tomoyo_inode_getattr)(path); -} - -static int tomoyo_path_truncate(const struct path *path) -{ - return static_call(tomoyo_path_truncate)(path); -} - -static int tomoyo_file_truncate(struct file *file) -{ - return static_call(tomoyo_file_truncate)(file); -} - -static int tomoyo_path_unlink(const struct path *parent, struct dentry *dentry) -{ - return static_call(tomoyo_path_unlink)(parent, dentry); -} - -static int tomoyo_path_mkdir(const struct path *parent, struct dentry *dentry, umode_t mode) -{ - return static_call(tomoyo_path_mkdir)(parent, dentry, mode); -} - -static int tomoyo_path_rmdir(const struct path *parent, struct dentry *dentry) -{ - return static_call(tomoyo_path_rmdir)(parent, dentry); -} - -static int tomoyo_path_symlink(const struct path *parent, struct dentry *dentry, - const char *old_name) -{ - return static_call(tomoyo_path_symlink)(parent, dentry, old_name); -} - -static int tomoyo_path_mknod(const struct path *parent, struct dentry *dentry, - umode_t mode, unsigned int dev) -{ - return static_call(tomoyo_path_mknod)(parent, dentry, mode, dev); -} - -static int tomoyo_path_link(struct dentry *old_dentry, const struct path *new_dir, - struct dentry *new_dentry) -{ - return static_call(tomoyo_path_link)(old_dentry, new_dir, new_dentry); -} - -static int tomoyo_path_rename(const struct path *old_parent, struct dentry *old_dentry, - const struct path *new_parent, struct dentry *new_dentry, - const unsigned int flags) -{ - return static_call(tomoyo_path_rename)(old_parent, old_dentry, new_parent, new_dentry, flags); -} - -static int tomoyo_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg) -{ - return static_call(tomoyo_file_fcntl)(file, cmd, arg); -} - -static int tomoyo_file_open(struct file *f) -{ - return static_call(tomoyo_file_open)(f); -} - -static int tomoyo_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - return static_call(tomoyo_file_ioctl)(file, cmd, arg); -} - -static int tomoyo_path_chmod(const struct path *path, umode_t mode) -{ - return static_call(tomoyo_path_chmod)(path, mode); -} - -static int tomoyo_path_chown(const struct path *path, kuid_t uid, kgid_t gid) -{ - return static_call(tomoyo_path_chown)(path, uid, gid); -} - -static int tomoyo_path_chroot(const struct path *path) -{ - return static_call(tomoyo_path_chroot)(path); -} - -static int tomoyo_sb_mount(const char *dev_name, const struct path *path, - const char *type, unsigned long flags, void *data) -{ - return static_call(tomoyo_sb_mount)(dev_name, path, type, flags, data); -} - -static int tomoyo_sb_umount(struct vfsmount *mnt, int flags) -{ - return static_call(tomoyo_sb_umount)(mnt, flags); -} - -static int tomoyo_sb_pivotroot(const struct path *old_path, const struct path *new_path) -{ - return static_call(tomoyo_sb_pivotroot)(old_path, new_path); -} - -static int tomoyo_socket_listen(struct socket *sock, int backlog) -{ - return static_call(tomoyo_socket_listen)(sock, backlog); -} - -static int tomoyo_socket_connect(struct socket *sock, struct sockaddr *addr, int addr_len) -{ - return static_call(tomoyo_socket_connect)(sock, addr, addr_len); -} - -static int tomoyo_socket_bind(struct socket *sock, struct sockaddr *addr, int addr_len) -{ - return static_call(tomoyo_socket_bind)(sock, addr, addr_len); -} - -static int tomoyo_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size) -{ - return static_call(tomoyo_socket_sendmsg)(sock, msg, size); -} - -static int tomoyo_task_alloc(struct task_struct *task, unsigned long clone_flags) -{ - return static_call(tomoyo_task_alloc)(task, clone_flags); -} - -static void tomoyo_task_free(struct task_struct *task) -{ - static_call(tomoyo_task_free)(task); -} - -void tomoyo_register_hooks(const struct tomoyo_hooks *tomoyo_hooks) -{ - static void *registered; - - if (cmpxchg(®istered, NULL, ®istered)) - panic("%s was called twice!\n", __func__); - static_call_update(tomoyo_task_free, tomoyo_hooks->task_free); - static_call_update(tomoyo_task_alloc, tomoyo_hooks->task_alloc); - static_call_update(tomoyo_cred_prepare, tomoyo_hooks->cred_prepare); - static_call_update(tomoyo_bprm_committed_creds, tomoyo_hooks->bprm_committed_creds); - static_call_update(tomoyo_bprm_check_security, tomoyo_hooks->bprm_check_security); - static_call_update(tomoyo_inode_getattr, tomoyo_hooks->inode_getattr); - static_call_update(tomoyo_path_truncate, tomoyo_hooks->path_truncate); - static_call_update(tomoyo_file_truncate, tomoyo_hooks->file_truncate); - static_call_update(tomoyo_path_unlink, tomoyo_hooks->path_unlink); - static_call_update(tomoyo_path_mkdir, tomoyo_hooks->path_mkdir); - static_call_update(tomoyo_path_rmdir, tomoyo_hooks->path_rmdir); - static_call_update(tomoyo_path_symlink, tomoyo_hooks->path_symlink); - static_call_update(tomoyo_path_mknod, tomoyo_hooks->path_mknod); - static_call_update(tomoyo_path_link, tomoyo_hooks->path_link); - static_call_update(tomoyo_path_rename, tomoyo_hooks->path_rename); - static_call_update(tomoyo_file_fcntl, tomoyo_hooks->file_fcntl); - static_call_update(tomoyo_file_open, tomoyo_hooks->file_open); - static_call_update(tomoyo_file_ioctl, tomoyo_hooks->file_ioctl); - static_call_update(tomoyo_path_chmod, tomoyo_hooks->path_chmod); - static_call_update(tomoyo_path_chown, tomoyo_hooks->path_chown); - static_call_update(tomoyo_path_chroot, tomoyo_hooks->path_chroot); - static_call_update(tomoyo_sb_mount, tomoyo_hooks->sb_mount); - static_call_update(tomoyo_sb_umount, tomoyo_hooks->sb_umount); - static_call_update(tomoyo_sb_pivotroot, tomoyo_hooks->sb_pivotroot); - static_call_update(tomoyo_socket_listen, tomoyo_hooks->socket_listen); - static_call_update(tomoyo_socket_connect, tomoyo_hooks->socket_connect); - static_call_update(tomoyo_socket_bind, tomoyo_hooks->socket_bind); - static_call_update(tomoyo_socket_sendmsg, tomoyo_hooks->socket_sendmsg); -} -EXPORT_SYMBOL_GPL(tomoyo_register_hooks); - -/* - * Temporary hack: functions needed by tomoyo.ko . This hack will be removed - * after all functions are marked as EXPORT_STMBOL_GPL(). - */ -#undef find_task_by_vpid -#undef find_task_by_pid_ns -#undef put_filesystem -#undef get_mm_exe_file -#undef d_absolute_path -const struct tomoyo_tmp_exports tomoyo_tmp_exports = { - .find_task_by_vpid = find_task_by_vpid, - .find_task_by_pid_ns = find_task_by_pid_ns, - .put_filesystem = put_filesystem, - .get_mm_exe_file = get_mm_exe_file, - .d_absolute_path = d_absolute_path, -}; -EXPORT_SYMBOL_GPL(tomoyo_tmp_exports); - -#endif - -#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER -static int tomoyo_bprm_creds_for_exec(struct linux_binprm *bprm) -{ - /* - * Load policy if /sbin/tomoyo-init exists and /sbin/init is requested - * for the first time. - */ - if (!tomoyo_policy_loaded) - tomoyo_load_policy(bprm->filename); - return 0; -} -#endif - -struct lsm_blob_sizes tomoyo_blob_sizes __ro_after_init = { - .lbs_task = sizeof(struct tomoyo_task), -}; - -static const struct lsm_id tomoyo_lsmid = { - .name = "tomoyo", - .id = LSM_ID_TOMOYO, -}; - -/* tomoyo_hooks is used for registering TOMOYO. */ -static struct security_hook_list tomoyo_hooks[] __ro_after_init = { - LSM_HOOK_INIT(cred_prepare, tomoyo_cred_prepare), - LSM_HOOK_INIT(bprm_committed_creds, tomoyo_bprm_committed_creds), - LSM_HOOK_INIT(task_alloc, tomoyo_task_alloc), - LSM_HOOK_INIT(task_free, tomoyo_task_free), -#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER - LSM_HOOK_INIT(bprm_creds_for_exec, tomoyo_bprm_creds_for_exec), -#endif - LSM_HOOK_INIT(bprm_check_security, tomoyo_bprm_check_security), - LSM_HOOK_INIT(file_fcntl, tomoyo_file_fcntl), - LSM_HOOK_INIT(file_open, tomoyo_file_open), - LSM_HOOK_INIT(file_truncate, tomoyo_file_truncate), - LSM_HOOK_INIT(path_truncate, tomoyo_path_truncate), - LSM_HOOK_INIT(path_unlink, tomoyo_path_unlink), - LSM_HOOK_INIT(path_mkdir, tomoyo_path_mkdir), - LSM_HOOK_INIT(path_rmdir, tomoyo_path_rmdir), - LSM_HOOK_INIT(path_symlink, tomoyo_path_symlink), - LSM_HOOK_INIT(path_mknod, tomoyo_path_mknod), - LSM_HOOK_INIT(path_link, tomoyo_path_link), - LSM_HOOK_INIT(path_rename, tomoyo_path_rename), - LSM_HOOK_INIT(inode_getattr, tomoyo_inode_getattr), - LSM_HOOK_INIT(file_ioctl, tomoyo_file_ioctl), - LSM_HOOK_INIT(file_ioctl_compat, tomoyo_file_ioctl), - LSM_HOOK_INIT(path_chmod, tomoyo_path_chmod), - LSM_HOOK_INIT(path_chown, tomoyo_path_chown), - LSM_HOOK_INIT(path_chroot, tomoyo_path_chroot), - LSM_HOOK_INIT(sb_mount, tomoyo_sb_mount), - LSM_HOOK_INIT(sb_umount, tomoyo_sb_umount), - LSM_HOOK_INIT(sb_pivotroot, tomoyo_sb_pivotroot), - LSM_HOOK_INIT(socket_bind, tomoyo_socket_bind), - LSM_HOOK_INIT(socket_connect, tomoyo_socket_connect), - LSM_HOOK_INIT(socket_listen, tomoyo_socket_listen), - LSM_HOOK_INIT(socket_sendmsg, tomoyo_socket_sendmsg), -}; - -int tomoyo_enabled __ro_after_init = 1; - -/* Has /sbin/init started? */ -bool tomoyo_policy_loaded; - -#ifdef CONFIG_SECURITY_TOMOYO_LKM -EXPORT_SYMBOL_GPL(tomoyo_blob_sizes); -EXPORT_SYMBOL_GPL(tomoyo_policy_loaded); - -struct tomoyo_operations tomoyo_ops; -EXPORT_SYMBOL_GPL(tomoyo_ops); - -/** - * tomoyo_init - Reserve hooks for TOMOYO Linux. - * - * Returns 0. - */ -static int __init tomoyo_init(void) -{ - /* register ourselves with the security framework */ - security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks), &tomoyo_lsmid); - tomoyo_ops.enabled = tomoyo_enabled; - pr_info("Hooks for initializing TOMOYO Linux are ready\n"); - return 0; -} -#else -/** - * tomoyo_init - Register TOMOYO Linux as a LSM module. - * - * Returns 0. - */ -static int __init tomoyo_init(void) -{ - struct tomoyo_task *s = tomoyo_task(current); - - /* register ourselves with the security framework */ - security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks), - &tomoyo_lsmid); - pr_info("TOMOYO Linux initialized\n"); - s->domain_info = &tomoyo_kernel_domain; - atomic_inc(&tomoyo_kernel_domain.users); - s->old_domain_info = NULL; - tomoyo_mm_init(); - - return 0; -} -#endif - -DEFINE_LSM(tomoyo) = { - .name = "tomoyo", - .enabled = &tomoyo_enabled, - .flags = LSM_FLAG_LEGACY_MAJOR, - .blobs = &tomoyo_blob_sizes, - .init = tomoyo_init, -}; diff --git a/security/tomoyo/load_policy.c b/security/tomoyo/load_policy.c index 6a2a72354a6418..363b65be87ab76 100644 --- a/security/tomoyo/load_policy.c +++ b/security/tomoyo/load_policy.c @@ -97,14 +97,6 @@ void tomoyo_load_policy(const char *filename) if (!tomoyo_policy_loader_exists()) return; done = true; -#ifdef CONFIG_SECURITY_TOMOYO_LKM - /* Load tomoyo.ko if not yet loaded. */ - if (!tomoyo_ops.check_profile) - request_module("tomoyo"); - /* Check if tomoyo.ko was successfully loaded. */ - if (!tomoyo_ops.check_profile) - panic("Failed to load tomoyo module."); -#endif pr_info("Calling %s to load policy. Please wait.\n", tomoyo_loader); argv[0] = (char *) tomoyo_loader; argv[1] = NULL; @@ -112,11 +104,7 @@ void tomoyo_load_policy(const char *filename) envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; envp[2] = NULL; call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); -#ifdef CONFIG_SECURITY_TOMOYO_LKM - tomoyo_ops.check_profile(); -#else tomoyo_check_profile(); -#endif } #endif diff --git a/security/tomoyo/proxy.c b/security/tomoyo/proxy.c deleted file mode 100644 index 1618cc0f2af8e3..00000000000000 --- a/security/tomoyo/proxy.c +++ /dev/null @@ -1,82 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * security/tomoyo/proxy.c - * - * Copyright (C) 2005-2011 NTT DATA CORPORATION - */ - -#include -#include "common.h" - -#ifdef CONFIG_SECURITY_TOMOYO_LKM - -struct tomoyo_task *tomoyo_task(struct task_struct *task) -{ - struct tomoyo_task *s = task->security + tomoyo_blob_sizes.lbs_task; - - if (unlikely(!s->domain_info)) { - if (likely(task == current)) { - s->domain_info = &tomoyo_kernel_domain; - atomic_inc(&tomoyo_kernel_domain.users); - } else { - /* Caller handles s->domain_info == NULL case. */ - } - } - return s; -} - -#include "hooks.h" - -/** - * tomoyo_runtime_init - Register TOMOYO Linux as a loadable LSM module. - * - * Returns 0 if TOMOYO is enabled, -EINVAL otherwise. - */ -static int __init tomoyo_runtime_init(void) -{ - const struct tomoyo_hooks tomoyo_hooks = { - .cred_prepare = tomoyo_cred_prepare, - .bprm_committed_creds = tomoyo_bprm_committed_creds, - .task_alloc = tomoyo_task_alloc, - .task_free = tomoyo_task_free, - .bprm_check_security = tomoyo_bprm_check_security, - .file_fcntl = tomoyo_file_fcntl, - .file_open = tomoyo_file_open, - .file_truncate = tomoyo_file_truncate, - .path_truncate = tomoyo_path_truncate, - .path_unlink = tomoyo_path_unlink, - .path_mkdir = tomoyo_path_mkdir, - .path_rmdir = tomoyo_path_rmdir, - .path_symlink = tomoyo_path_symlink, - .path_mknod = tomoyo_path_mknod, - .path_link = tomoyo_path_link, - .path_rename = tomoyo_path_rename, - .inode_getattr = tomoyo_inode_getattr, - .file_ioctl = tomoyo_file_ioctl, - .file_ioctl_compat = tomoyo_file_ioctl, - .path_chmod = tomoyo_path_chmod, - .path_chown = tomoyo_path_chown, - .path_chroot = tomoyo_path_chroot, - .sb_mount = tomoyo_sb_mount, - .sb_umount = tomoyo_sb_umount, - .sb_pivotroot = tomoyo_sb_pivotroot, - .socket_bind = tomoyo_socket_bind, - .socket_connect = tomoyo_socket_connect, - .socket_listen = tomoyo_socket_listen, - .socket_sendmsg = tomoyo_socket_sendmsg, - }; - - if (!tomoyo_ops.enabled) - return -EINVAL; - tomoyo_ops.check_profile = tomoyo_check_profile; - pr_info("TOMOYO Linux initialized\n"); - tomoyo_task(current); - tomoyo_mm_init(); - tomoyo_interface_init(); - tomoyo_register_hooks(&tomoyo_hooks); - return 0; -} -module_init(tomoyo_runtime_init); -MODULE_LICENSE("GPL"); - -#endif diff --git a/security/tomoyo/securityfs_if.c b/security/tomoyo/securityfs_if.c index a3b821b7f4771a..a2705798476f9a 100644 --- a/security/tomoyo/securityfs_if.c +++ b/security/tomoyo/securityfs_if.c @@ -229,19 +229,17 @@ static void __init tomoyo_create_entry(const char *name, const umode_t mode, } /** - * tomoyo_interface_init - Initialize /sys/kernel/security/tomoyo/ interface. + * tomoyo_initerface_init - Initialize /sys/kernel/security/tomoyo/ interface. * * Returns 0. */ -int __init tomoyo_interface_init(void) +static int __init tomoyo_initerface_init(void) { struct tomoyo_domain_info *domain; struct dentry *tomoyo_dir; -#ifndef CONFIG_SECURITY_TOMOYO_LKM if (!tomoyo_enabled) return 0; -#endif domain = tomoyo_domain(); /* Don't create securityfs entries unless registered. */ if (domain != &tomoyo_kernel_domain) @@ -272,6 +270,4 @@ int __init tomoyo_interface_init(void) return 0; } -#ifndef CONFIG_SECURITY_TOMOYO_LKM -fs_initcall(tomoyo_interface_init); -#endif +fs_initcall(tomoyo_initerface_init); diff --git a/security/tomoyo/hooks.h b/security/tomoyo/tomoyo.c similarity index 79% rename from security/tomoyo/hooks.h rename to security/tomoyo/tomoyo.c index 58929bb71477dd..04a92c3d65d44d 100644 --- a/security/tomoyo/hooks.h +++ b/security/tomoyo/tomoyo.c @@ -1,10 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 /* - * security/tomoyo/hooks.h + * security/tomoyo/tomoyo.c * * Copyright (C) 2005-2011 NTT DATA CORPORATION */ +#include +#include #include "common.h" /** @@ -16,6 +18,10 @@ struct tomoyo_domain_info *tomoyo_domain(void) { struct tomoyo_task *s = tomoyo_task(current); + if (s->old_domain_info && !current->in_execve) { + atomic_dec(&s->old_domain_info->users); + s->old_domain_info = NULL; + } return s->domain_info; } @@ -56,6 +62,26 @@ static void tomoyo_bprm_committed_creds(const struct linux_binprm *bprm) s->old_domain_info = NULL; } +#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER +/** + * tomoyo_bprm_creds_for_exec - Target for security_bprm_creds_for_exec(). + * + * @bprm: Pointer to "struct linux_binprm". + * + * Returns 0. + */ +static int tomoyo_bprm_creds_for_exec(struct linux_binprm *bprm) +{ + /* + * Load policy if /sbin/tomoyo-init exists and /sbin/init is requested + * for the first time. + */ + if (!tomoyo_policy_loaded) + tomoyo_load_policy(bprm->filename); + return 0; +} +#endif + /** * tomoyo_bprm_check_security - Target for security_bprm_check(). * @@ -475,6 +501,10 @@ static int tomoyo_socket_sendmsg(struct socket *sock, struct msghdr *msg, return tomoyo_socket_sendmsg_permission(sock, msg, size); } +struct lsm_blob_sizes tomoyo_blob_sizes __ro_after_init = { + .lbs_task = sizeof(struct tomoyo_task), +}; + /** * tomoyo_task_alloc - Target for security_task_alloc(). * @@ -513,3 +543,81 @@ static void tomoyo_task_free(struct task_struct *task) s->old_domain_info = NULL; } } + +static const struct lsm_id tomoyo_lsmid = { + .name = "tomoyo", + .id = LSM_ID_TOMOYO, +}; + +/* + * tomoyo_security_ops is a "struct security_operations" which is used for + * registering TOMOYO. + */ +static struct security_hook_list tomoyo_hooks[] __ro_after_init = { + LSM_HOOK_INIT(cred_prepare, tomoyo_cred_prepare), + LSM_HOOK_INIT(bprm_committed_creds, tomoyo_bprm_committed_creds), + LSM_HOOK_INIT(task_alloc, tomoyo_task_alloc), + LSM_HOOK_INIT(task_free, tomoyo_task_free), +#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER + LSM_HOOK_INIT(bprm_creds_for_exec, tomoyo_bprm_creds_for_exec), +#endif + LSM_HOOK_INIT(bprm_check_security, tomoyo_bprm_check_security), + LSM_HOOK_INIT(file_fcntl, tomoyo_file_fcntl), + LSM_HOOK_INIT(file_open, tomoyo_file_open), + LSM_HOOK_INIT(file_truncate, tomoyo_file_truncate), + LSM_HOOK_INIT(path_truncate, tomoyo_path_truncate), + LSM_HOOK_INIT(path_unlink, tomoyo_path_unlink), + LSM_HOOK_INIT(path_mkdir, tomoyo_path_mkdir), + LSM_HOOK_INIT(path_rmdir, tomoyo_path_rmdir), + LSM_HOOK_INIT(path_symlink, tomoyo_path_symlink), + LSM_HOOK_INIT(path_mknod, tomoyo_path_mknod), + LSM_HOOK_INIT(path_link, tomoyo_path_link), + LSM_HOOK_INIT(path_rename, tomoyo_path_rename), + LSM_HOOK_INIT(inode_getattr, tomoyo_inode_getattr), + LSM_HOOK_INIT(file_ioctl, tomoyo_file_ioctl), + LSM_HOOK_INIT(file_ioctl_compat, tomoyo_file_ioctl), + LSM_HOOK_INIT(path_chmod, tomoyo_path_chmod), + LSM_HOOK_INIT(path_chown, tomoyo_path_chown), + LSM_HOOK_INIT(path_chroot, tomoyo_path_chroot), + LSM_HOOK_INIT(sb_mount, tomoyo_sb_mount), + LSM_HOOK_INIT(sb_umount, tomoyo_sb_umount), + LSM_HOOK_INIT(sb_pivotroot, tomoyo_sb_pivotroot), + LSM_HOOK_INIT(socket_bind, tomoyo_socket_bind), + LSM_HOOK_INIT(socket_connect, tomoyo_socket_connect), + LSM_HOOK_INIT(socket_listen, tomoyo_socket_listen), + LSM_HOOK_INIT(socket_sendmsg, tomoyo_socket_sendmsg), +}; + +/* Lock for GC. */ +DEFINE_SRCU(tomoyo_ss); + +int tomoyo_enabled __ro_after_init = 1; + +/** + * tomoyo_init - Register TOMOYO Linux as a LSM module. + * + * Returns 0. + */ +static int __init tomoyo_init(void) +{ + struct tomoyo_task *s = tomoyo_task(current); + + /* register ourselves with the security framework */ + security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks), + &tomoyo_lsmid); + pr_info("TOMOYO Linux initialized\n"); + s->domain_info = &tomoyo_kernel_domain; + atomic_inc(&tomoyo_kernel_domain.users); + s->old_domain_info = NULL; + tomoyo_mm_init(); + + return 0; +} + +DEFINE_LSM(tomoyo) = { + .name = "tomoyo", + .enabled = &tomoyo_enabled, + .flags = LSM_FLAG_LEGACY_MAJOR, + .blobs = &tomoyo_blob_sizes, + .init = tomoyo_init, +}; diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c index b851ff3773829f..6799b1122c9d88 100644 --- a/security/tomoyo/util.c +++ b/security/tomoyo/util.c @@ -13,6 +13,9 @@ /* Lock for protecting policy. */ DEFINE_MUTEX(tomoyo_policy_lock); +/* Has /sbin/init started? */ +bool tomoyo_policy_loaded; + /* * Mapping table from "enum tomoyo_mac_index" to * "enum tomoyo_mac_category_index". From faab35a0370fd6e0821c7a8dd213492946fc776f Mon Sep 17 00:00:00 2001 From: "Luis Henriques (SUSE)" Date: Mon, 23 Sep 2024 11:49:08 +0100 Subject: [PATCH 17/33] ext4: use handle to mark fc as ineligible in __track_dentry_update() Calling ext4_fc_mark_ineligible() with a NULL handle is racy and may result in a fast-commit being done before the filesystem is effectively marked as ineligible. This patch fixes the calls to this function in __track_dentry_update() by adding an extra parameter to the callback used in ext4_fc_track_template(). Suggested-by: Jan Kara Signed-off-by: Luis Henriques (SUSE) Reviewed-by: Jan Kara Link: https://patch.msgid.link/20240923104909.18342-2-luis.henriques@linux.dev Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/fast_commit.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index eaa5f5b51f500b..b33664f6ce2abd 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -379,7 +379,7 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handl */ static int ext4_fc_track_template( handle_t *handle, struct inode *inode, - int (*__fc_track_fn)(struct inode *, void *, bool), + int (*__fc_track_fn)(handle_t *handle, struct inode *, void *, bool), void *args, int enqueue) { bool update = false; @@ -396,7 +396,7 @@ static int ext4_fc_track_template( ext4_fc_reset_inode(inode); ei->i_sync_tid = tid; } - ret = __fc_track_fn(inode, args, update); + ret = __fc_track_fn(handle, inode, args, update); mutex_unlock(&ei->i_fc_lock); if (!enqueue) @@ -420,7 +420,8 @@ struct __track_dentry_update_args { }; /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */ -static int __track_dentry_update(struct inode *inode, void *arg, bool update) +static int __track_dentry_update(handle_t *handle, struct inode *inode, + void *arg, bool update) { struct ext4_fc_dentry_update *node; struct ext4_inode_info *ei = EXT4_I(inode); @@ -435,14 +436,14 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) if (IS_ENCRYPTED(dir)) { ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_ENCRYPTED_FILENAME, - NULL); + handle); mutex_lock(&ei->i_fc_lock); return -EOPNOTSUPP; } node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); if (!node) { - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, NULL); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, handle); mutex_lock(&ei->i_fc_lock); return -ENOMEM; } @@ -454,7 +455,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS); if (!node->fcd_name.name) { kmem_cache_free(ext4_fc_dentry_cachep, node); - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, NULL); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, handle); mutex_lock(&ei->i_fc_lock); return -ENOMEM; } @@ -576,7 +577,8 @@ void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) } /* __track_fn for inode tracking */ -static int __track_inode(struct inode *inode, void *arg, bool update) +static int __track_inode(handle_t *handle, struct inode *inode, void *arg, + bool update) { if (update) return -EEXIST; @@ -614,7 +616,8 @@ struct __track_range_args { }; /* __track_fn for tracking data updates */ -static int __track_range(struct inode *inode, void *arg, bool update) +static int __track_range(handle_t *handle, struct inode *inode, void *arg, + bool update) { struct ext4_inode_info *ei = EXT4_I(inode); ext4_lblk_t oldstart; From 04e6ce8f06d161399e5afde3df5dcfa9455b4952 Mon Sep 17 00:00:00 2001 From: "Luis Henriques (SUSE)" Date: Mon, 23 Sep 2024 11:49:09 +0100 Subject: [PATCH 18/33] ext4: mark fc as ineligible using an handle in ext4_xattr_set() Calling ext4_fc_mark_ineligible() with a NULL handle is racy and may result in a fast-commit being done before the filesystem is effectively marked as ineligible. This patch moves the call to this function so that an handle can be used. If a transaction fails to start, then there's not point in trying to mark the filesystem as ineligible, and an error will eventually be returned to user-space. Suggested-by: Jan Kara Signed-off-by: Luis Henriques (SUSE) Reviewed-by: Jan Kara Link: https://patch.msgid.link/20240923104909.18342-3-luis.henriques@linux.dev Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/xattr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e0e1956dcdd397..7647e9f6e1903a 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2559,6 +2559,8 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name, error = ext4_xattr_set_handle(handle, inode, name_index, name, value, value_len, flags); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, + handle); error2 = ext4_journal_stop(handle); if (error == -ENOSPC && ext4_should_retry_alloc(sb, &retries)) @@ -2566,7 +2568,6 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name, if (error == 0) error = error2; } - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, NULL); return error; } From 6121258c2b33ceac3d21f6a221452692c465df88 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Fri, 27 Sep 2024 21:33:29 +0800 Subject: [PATCH 19/33] ext4: fix off by one issue in alloc_flex_gd() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wesley reported an issue: ================================================================== EXT4-fs (dm-5): resizing filesystem from 7168 to 786432 blocks ------------[ cut here ]------------ kernel BUG at fs/ext4/resize.c:324! CPU: 9 UID: 0 PID: 3576 Comm: resize2fs Not tainted 6.11.0+ #27 RIP: 0010:ext4_resize_fs+0x1212/0x12d0 Call Trace: __ext4_ioctl+0x4e0/0x1800 ext4_ioctl+0x12/0x20 __x64_sys_ioctl+0x99/0xd0 x64_sys_call+0x1206/0x20d0 do_syscall_64+0x72/0x110 entry_SYSCALL_64_after_hwframe+0x76/0x7e ================================================================== While reviewing the patch, Honza found that when adjusting resize_bg in alloc_flex_gd(), it was possible for flex_gd->resize_bg to be bigger than flexbg_size. The reproduction of the problem requires the following: o_group = flexbg_size * 2 * n; o_size = (o_group + 1) * group_size; n_group: [o_group + flexbg_size, o_group + flexbg_size * 2) o_size = (n_group + 1) * group_size; Take n=0,flexbg_size=16 as an example: last:15 |o---------------|--------------n-| o_group:0 resize to n_group:30 The corresponding reproducer is: img=test.img rm -f $img truncate -s 600M $img mkfs.ext4 -F $img -b 1024 -G 16 8M dev=`losetup -f --show $img` mkdir -p /tmp/test mount $dev /tmp/test resize2fs $dev 248M Delete the problematic plus 1 to fix the issue, and add a WARN_ON_ONCE() to prevent the issue from happening again. [ Note: another reproucer which this commit fixes is: img=test.img rm -f $img truncate -s 25MiB $img mkfs.ext4 -b 4096 -E nodiscard,lazy_itable_init=0,lazy_journal_init=0 $img truncate -s 3GiB $img dev=`losetup -f --show $img` mkdir -p /tmp/test mount $dev /tmp/test resize2fs $dev 3G umount $dev losetup -d $dev -- TYT ] Reported-by: Wesley Hershberger Closes: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2081231 Reported-by: Stéphane Graber Closes: https://lore.kernel.org/all/20240925143325.518508-1-aleksandr.mikhalitsyn@canonical.com/ Tested-by: Alexander Mikhalitsyn Tested-by: Eric Sandeen Fixes: 665d3e0af4d3 ("ext4: reduce unnecessary memory allocation in alloc_flex_gd()") Cc: stable@vger.kernel.org Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://patch.msgid.link/20240927133329.1015041-1-libaokun@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/resize.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index e04eb08b90601e..a2704f06436106 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -230,8 +230,8 @@ struct ext4_new_flex_group_data { #define MAX_RESIZE_BG 16384 /* - * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of - * @flexbg_size. + * alloc_flex_gd() allocates an ext4_new_flex_group_data that satisfies the + * resizing from @o_group to @n_group, its size is typically @flexbg_size. * * Returns NULL on failure otherwise address of the allocated structure. */ @@ -239,25 +239,27 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned int flexbg_size, ext4_group_t o_group, ext4_group_t n_group) { ext4_group_t last_group; + unsigned int max_resize_bg; struct ext4_new_flex_group_data *flex_gd; flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS); if (flex_gd == NULL) goto out3; - if (unlikely(flexbg_size > MAX_RESIZE_BG)) - flex_gd->resize_bg = MAX_RESIZE_BG; - else - flex_gd->resize_bg = flexbg_size; + max_resize_bg = umin(flexbg_size, MAX_RESIZE_BG); + flex_gd->resize_bg = max_resize_bg; /* Avoid allocating large 'groups' array if not needed */ last_group = o_group | (flex_gd->resize_bg - 1); if (n_group <= last_group) - flex_gd->resize_bg = 1 << fls(n_group - o_group + 1); + flex_gd->resize_bg = 1 << fls(n_group - o_group); else if (n_group - last_group < flex_gd->resize_bg) - flex_gd->resize_bg = 1 << max(fls(last_group - o_group + 1), + flex_gd->resize_bg = 1 << max(fls(last_group - o_group), fls(n_group - last_group)); + if (WARN_ON_ONCE(flex_gd->resize_bg > max_resize_bg)) + flex_gd->resize_bg = max_resize_bg; + flex_gd->groups = kmalloc_array(flex_gd->resize_bg, sizeof(struct ext4_new_group_data), GFP_NOFS); From 6b63a948a73ba3df0fb3ab0c44807df344bc5bbf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 4 Oct 2024 19:44:32 -0400 Subject: [PATCH 20/33] bcachefs: Add missing wakeup to bch2_inode_hash_remove() This fixes two different bugs: - Looser locking with the rhashtable means we need to recheck if the inode is still hashed after prepare_to_wait(), and add a corresponding wakeup after removing from the hash table. - da18ecbf0fb6 ("fs: add i_state helpers") changed the bit waitqueues used for inodes, and bcachefs wasn't updated and thus broke; this updates bcachefs to the new helper. Fixes: 112d21fd1a12 ("bcachefs: switch to rhashtable for vfs inodes hash") Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 4a1bb07a257448..5bfc26d5827017 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -174,20 +174,24 @@ static const struct rhashtable_params bch2_vfs_inodes_params = { .automatic_shrinking = true, }; -static void __wait_on_freeing_inode(struct inode *inode) +struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) { - wait_queue_head_t *wq; - DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); - wq = bit_waitqueue(&inode->i_state, __I_NEW); - prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); - spin_unlock(&inode->i_lock); - schedule(); - finish_wait(wq, &wait.wq_entry); + return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params); } -struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) +static void __wait_on_freeing_inode(struct bch_fs *c, + struct bch_inode_info *inode, + subvol_inum inum) { - return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params); + wait_queue_head_t *wq; + DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW); + wq = inode_bit_waitqueue(&wait, &inode->v, __I_NEW); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); + spin_unlock(&inode->v.i_lock); + + if (__bch2_inode_hash_find(c, inum) == inode) + schedule_timeout(HZ * 10); + finish_wait(wq, &wait.wq_entry); } static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, struct btree_trans *trans, @@ -204,10 +208,10 @@ static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, struct btre } if ((inode->v.i_state & (I_FREEING|I_WILL_FREE))) { if (!trans) { - __wait_on_freeing_inode(&inode->v); + __wait_on_freeing_inode(c, inode, inum); } else { bch2_trans_unlock(trans); - __wait_on_freeing_inode(&inode->v); + __wait_on_freeing_inode(c, inode, inum); int ret = bch2_trans_relock(trans); if (ret) return ERR_PTR(ret); @@ -232,6 +236,11 @@ static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inod &inode->hash, bch2_vfs_inodes_params); BUG_ON(ret); inode->v.i_hash.pprev = NULL; + /* + * This pairs with the bch2_inode_hash_find() -> + * __wait_on_freeing_inode() path + */ + inode_wake_up_bit(&inode->v, __I_NEW); } } From 20826fe6b810bce3efba9ef5d74cf13ebe5f23d9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 28 Sep 2024 02:44:12 -0400 Subject: [PATCH 21/33] bcachefs: Fix reattach_inode() Ensure a copy of the lost+found inode exists in the snapshot that we're reattaching, so that we don't trigger warnings in lookup_inode_for_snapshot() later. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 351de61c7ed120..881ad5b9447fd8 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -354,13 +354,12 @@ static int reattach_inode(struct btree_trans *trans, if (ret) return ret; - if (S_ISDIR(inode->bi_mode)) { - lostfound.bi_nlink++; + lostfound.bi_nlink += S_ISDIR(inode->bi_mode); - ret = __bch2_fsck_write_inode(trans, &lostfound, U32_MAX); - if (ret) - return ret; - } + /* ensure lost+found inode is also present in inode snapshot */ + ret = __bch2_fsck_write_inode(trans, &lostfound, inode_snapshot); + if (ret) + return ret; dir_hash = bch2_hash_info_init(c, &lostfound); From fda7b1ffdef75cc0f4d34255e88b5894e2ce75b1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 28 Sep 2024 15:33:08 -0400 Subject: [PATCH 22/33] bcachefs: Create lost+found in correct snapshot Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 881ad5b9447fd8..f0d6696c4df616 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -283,11 +283,17 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, return ret; create_lostfound: + /* + * we always create lost+found in the root snapshot; we don't want + * different branches of the snapshot tree to have different lost+found + */ + snapshot = le32_to_cpu(st.root_snapshot); /* * XXX: we could have a nicer log message here if we had a nice way to * walk backpointers to print a path */ - bch_notice(c, "creating lost+found in snapshot %u", le32_to_cpu(st.root_snapshot)); + bch_notice(c, "creating lost+found in subvol %llu snapshot %u", + root_inum.subvol, le32_to_cpu(st.root_snapshot)); u64 now = bch2_current_time(c); struct btree_iter lostfound_iter = { NULL }; From 658c82f41e8075e18b98b8705ed0cc34346f35c2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 4 Oct 2024 15:05:40 -0400 Subject: [PATCH 23/33] bcachefs: bkey errors are only AUTOFIX during read Newly generated keys, in the transaction commit path or write path, should not be AUTOFIX; those indicate bugs that we need to fail fast for. Fixes: 5612daafb764 ("bcachefs: Fix fsck warnings from bkey validation") Signed-off-by: Kent Overstreet --- fs/bcachefs/error.c | 11 +++++++++-- fs/bcachefs/error.h | 9 +++------ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 3a16b535b6c324..aac1960321b770 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -430,10 +430,17 @@ int __bch2_fsck_err(struct bch_fs *c, int __bch2_bkey_fsck_err(struct bch_fs *c, struct bkey_s_c k, - enum bch_fsck_flags flags, + enum bch_validate_flags validate_flags, enum bch_sb_error_id err, const char *fmt, ...) { + if (validate_flags & BCH_VALIDATE_silent) + return -BCH_ERR_fsck_delete_bkey; + + unsigned fsck_flags = 0; + if (!(validate_flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) + fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX; + struct printbuf buf = PRINTBUF; va_list args; @@ -445,7 +452,7 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, va_end(args); prt_str(&buf, ": delete?"); - int ret = __bch2_fsck_err(c, NULL, flags, err, "%s", buf.buf); + int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s", buf.buf); printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 21ee7211b03e8d..6551ada926b607 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -167,10 +167,11 @@ void bch2_flush_fsck_errs(struct bch_fs *); #define fsck_err_on(cond, c, _err_type, ...) \ __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) +enum bch_validate_flags; __printf(5, 6) int __bch2_bkey_fsck_err(struct bch_fs *, struct bkey_s_c, - enum bch_fsck_flags, + enum bch_validate_flags, enum bch_sb_error_id, const char *, ...); @@ -180,11 +181,7 @@ int __bch2_bkey_fsck_err(struct bch_fs *, */ #define bkey_fsck_err(c, _err_type, _err_msg, ...) \ do { \ - if ((flags & BCH_VALIDATE_silent)) { \ - ret = -BCH_ERR_fsck_delete_bkey; \ - goto fsck_err; \ - } \ - int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX|FSCK_AUTOFIX,\ + int _ret = __bch2_bkey_fsck_err(c, k, flags, \ BCH_FSCK_ERR_##_err_type, \ _err_msg, ##__VA_ARGS__); \ if (_ret != -BCH_ERR_fsck_fix && \ From 492e24d7604a1b78c8af3c30984a0cffc17d6bdf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 1 Oct 2024 16:26:02 -0400 Subject: [PATCH 24/33] bcachefs: Make sure we print error that causes fsck to bail out Signed-off-by: Kent Overstreet --- fs/bcachefs/error.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index aac1960321b770..7a79f695ba2e74 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -393,6 +393,14 @@ int __bch2_fsck_err(struct bch_fs *c, !(flags & FSCK_CAN_IGNORE))) ret = -BCH_ERR_fsck_errors_not_fixed; + bool exiting = + test_bit(BCH_FS_fsck_running, &c->flags) && + (ret != -BCH_ERR_fsck_fix && + ret != -BCH_ERR_fsck_ignore); + + if (exiting) + print = true; + if (print) { if (bch2_fs_stdio_redirect(c)) bch2_print(c, "%s\n", out->buf); @@ -400,9 +408,7 @@ int __bch2_fsck_err(struct bch_fs *c, bch2_print_string_as_lines(KERN_ERR, out->buf); } - if (test_bit(BCH_FS_fsck_running, &c->flags) && - (ret != -BCH_ERR_fsck_fix && - ret != -BCH_ERR_fsck_ignore)) + if (exiting) bch_err(c, "Unable to continue, halting"); else if (suppressing) bch_err(c, "Ratelimiting new instances of previous error"); From 1bea714c532abf101e939a90b8c920ef9205cfa3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 1 Oct 2024 16:26:21 -0400 Subject: [PATCH 25/33] bcachefs: Mark more errors AUTOFIX Errors are getting marked as AUTOFIX once they've been (re)-tested and audited. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors_format.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index ed5dca5e116172..6a63d24180cafb 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -129,20 +129,20 @@ enum bch_fsck_flags { x(freespace_key_wrong, 115, 0) \ x(freespace_hole_missing, 116, 0) \ x(bucket_gens_val_size_bad, 117, 0) \ - x(bucket_gens_key_wrong, 118, 0) \ - x(bucket_gens_hole_wrong, 119, 0) \ - x(bucket_gens_to_invalid_dev, 120, 0) \ - x(bucket_gens_to_invalid_buckets, 121, 0) \ - x(bucket_gens_nonzero_for_invalid_buckets, 122, 0) \ + x(bucket_gens_key_wrong, 118, FSCK_AUTOFIX) \ + x(bucket_gens_hole_wrong, 119, FSCK_AUTOFIX) \ + x(bucket_gens_to_invalid_dev, 120, FSCK_AUTOFIX) \ + x(bucket_gens_to_invalid_buckets, 121, FSCK_AUTOFIX) \ + x(bucket_gens_nonzero_for_invalid_buckets, 122, FSCK_AUTOFIX) \ x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \ x(need_discard_freespace_key_bad, 124, 0) \ x(backpointer_bucket_offset_wrong, 125, 0) \ x(backpointer_to_missing_device, 126, 0) \ x(backpointer_to_missing_alloc, 127, 0) \ x(backpointer_to_missing_ptr, 128, 0) \ - x(lru_entry_at_time_0, 129, 0) \ - x(lru_entry_to_invalid_bucket, 130, 0) \ - x(lru_entry_bad, 131, 0) \ + x(lru_entry_at_time_0, 129, FSCK_AUTOFIX) \ + x(lru_entry_to_invalid_bucket, 130, FSCK_AUTOFIX) \ + x(lru_entry_bad, 131, FSCK_AUTOFIX) \ x(btree_ptr_val_too_big, 132, 0) \ x(btree_ptr_v2_val_too_big, 133, 0) \ x(btree_ptr_has_non_ptr, 134, 0) \ @@ -158,9 +158,9 @@ enum bch_fsck_flags { x(ptr_after_last_bucket, 144, 0) \ x(ptr_before_first_bucket, 145, 0) \ x(ptr_spans_multiple_buckets, 146, 0) \ - x(ptr_to_missing_backpointer, 147, 0) \ - x(ptr_to_missing_alloc_key, 148, 0) \ - x(ptr_to_missing_replicas_entry, 149, 0) \ + x(ptr_to_missing_backpointer, 147, FSCK_AUTOFIX) \ + x(ptr_to_missing_alloc_key, 148, FSCK_AUTOFIX) \ + x(ptr_to_missing_replicas_entry, 149, FSCK_AUTOFIX) \ x(ptr_to_missing_stripe, 150, 0) \ x(ptr_to_incorrect_stripe, 151, 0) \ x(ptr_gen_newer_than_bucket_gen, 152, 0) \ @@ -194,7 +194,7 @@ enum bch_fsck_flags { x(snapshot_skiplist_not_normalized, 180, 0) \ x(snapshot_skiplist_bad, 181, 0) \ x(snapshot_should_not_have_subvol, 182, 0) \ - x(snapshot_to_bad_snapshot_tree, 183, 0) \ + x(snapshot_to_bad_snapshot_tree, 183, FSCK_AUTOFIX) \ x(snapshot_bad_depth, 184, 0) \ x(snapshot_bad_skiplist, 185, 0) \ x(subvol_pos_bad, 186, 0) \ From 01bf5e3bd26ff8e49bf06fa4180f3eab51ab06df Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 1 Oct 2024 16:40:33 -0400 Subject: [PATCH 26/33] bcachefs: minor lru fsck fixes check_lru_key() wasn't using write buffer updates for deleting bad lru entries - dating from before the lru btree used the btree write buffer. And when possibly flushing the btree write buffer (to make sure we're seeing a real inconsistency), we need to be using the modern bch2_btree_write_buffer_maybe_flush(). Signed-off-by: Kent Overstreet --- fs/bcachefs/lru.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 96f2f4f8c39787..dd5df668131a0d 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "alloc_background.h" +#include "bkey_buf.h" #include "btree_iter.h" #include "btree_update.h" #include "btree_write_buffer.h" @@ -118,7 +119,7 @@ int bch2_lru_check_set(struct btree_trans *trans, static int bch2_check_lru_key(struct btree_trans *trans, struct btree_iter *lru_iter, struct bkey_s_c lru_k, - struct bpos *last_flushed_pos) + struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; struct btree_iter iter; @@ -136,7 +137,7 @@ static int bch2_check_lru_key(struct btree_trans *trans, trans, lru_entry_to_invalid_bucket, "lru key points to nonexistent device:bucket %llu:%llu", alloc_pos.inode, alloc_pos.offset)) - return bch2_btree_delete_at(trans, lru_iter, 0); + return bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false); k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, alloc_pos, 0); ret = bkey_err(k); @@ -156,12 +157,9 @@ static int bch2_check_lru_key(struct btree_trans *trans, if (lru_k.k->type != KEY_TYPE_set || lru_pos_time(lru_k.k->p) != idx) { - if (!bpos_eq(*last_flushed_pos, lru_k.k->p)) { - *last_flushed_pos = lru_k.k->p; - ret = bch2_btree_write_buffer_flush_sync(trans) ?: - -BCH_ERR_transaction_restart_write_buffer_flush; - goto out; - } + ret = bch2_btree_write_buffer_maybe_flush(trans, lru_k, last_flushed); + if (ret) + goto err; if (fsck_err(trans, lru_entry_bad, "incorrect lru entry: lru %s time %llu\n" @@ -171,9 +169,8 @@ static int bch2_check_lru_key(struct btree_trans *trans, lru_pos_time(lru_k.k->p), (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf), (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) - ret = bch2_btree_delete_at(trans, lru_iter, 0); + ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false); } -out: err: fsck_err: bch2_trans_iter_exit(trans, &iter); @@ -184,12 +181,18 @@ static int bch2_check_lru_key(struct btree_trans *trans, int bch2_check_lrus(struct bch_fs *c) { - struct bpos last_flushed_pos = POS_MIN; + struct bkey_buf last_flushed; + + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, - bch2_check_lru_key(trans, &iter, k, &last_flushed_pos))); + bch2_check_lru_key(trans, &iter, k, &last_flushed))); + + bch2_bkey_buf_exit(&last_flushed, c); bch_err_fn(c, ret); return ret; From 260af1562ec14353824da24fe7acc179a902558e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 1 Oct 2024 19:08:37 -0400 Subject: [PATCH 27/33] bcachefs: Kill alloc_v4.fragmentation_lru The fragmentation_lru field hasn't been needed since we reworked the LRU btrees to use the btree write buffer; previously it was used to resolve collisions, but the revised LRU btree uses the backpointer (the bucket) as part of the key. It should have been deleted at the time of the LRU rework; since it wasn't, that left places for bugs to hide, in check/repair. This fixes LRU fsck on a filesystem image helpfully provided by a user who disappeared before I could get his name for the reported-by. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 30 ++++++++++++++++++--------- fs/bcachefs/alloc_background_format.h | 2 +- fs/bcachefs/btree_gc.c | 3 --- fs/bcachefs/lru.c | 7 +++++-- fs/bcachefs/move.c | 2 +- fs/bcachefs/movinggc.c | 12 ++++++++--- fs/bcachefs/sb-errors_format.h | 4 ++-- 7 files changed, 38 insertions(+), 22 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 645b5ed4babb18..4e4a448f693166 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -332,7 +332,6 @@ void bch2_alloc_v4_swab(struct bkey_s k) a->io_time[1] = swab64(a->io_time[1]); a->stripe = swab32(a->stripe); a->nr_external_backpointers = swab32(a->nr_external_backpointers); - a->fragmentation_lru = swab64(a->fragmentation_lru); a->stripe_sectors = swab32(a->stripe_sectors); bps = alloc_v4_backpointers(a); @@ -347,6 +346,7 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c { struct bch_alloc_v4 _a; const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); + struct bch_dev *ca = c ? bch2_dev_bucket_tryget_noerror(c, k.k->p) : NULL; prt_newline(out); printbuf_indent_add(out, 2); @@ -364,9 +364,13 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy); prt_printf(out, "io_time[READ] %llu\n", a->io_time[READ]); prt_printf(out, "io_time[WRITE] %llu\n", a->io_time[WRITE]); - prt_printf(out, "fragmentation %llu\n", a->fragmentation_lru); + + if (ca) + prt_printf(out, "fragmentation %llu\n", alloc_lru_idx_fragmentation(*a, ca)); prt_printf(out, "bp_start %llu\n", BCH_ALLOC_V4_BACKPOINTERS_START(a)); printbuf_indent_sub(out, 2); + + bch2_dev_put(ca); } void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) @@ -882,12 +886,13 @@ int bch2_trigger_alloc(struct btree_trans *trans, goto err; } - new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a, ca); - if (old_a->fragmentation_lru != new_a->fragmentation_lru) { + old_lru = alloc_lru_idx_fragmentation(*old_a, ca); + new_lru = alloc_lru_idx_fragmentation(*new_a, ca); + if (old_lru != new_lru) { ret = bch2_lru_change(trans, BCH_LRU_FRAGMENTATION_START, bucket_to_u64(new.k->p), - old_a->fragmentation_lru, new_a->fragmentation_lru); + old_lru, new_lru); if (ret) goto err; } @@ -1629,18 +1634,22 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, if (ret) return ret; + struct bch_dev *ca = bch2_dev_tryget_noerror(c, alloc_k.k->p.inode); + if (!ca) + return 0; + a = bch2_alloc_to_v4(alloc_k, &a_convert); - if (a->fragmentation_lru) { + u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca); + if (lru_idx) { ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START, - a->fragmentation_lru, - alloc_k, last_flushed); + lru_idx, alloc_k, last_flushed); if (ret) - return ret; + goto err; } if (a->data_type != BCH_DATA_cached) - return 0; + goto err; if (fsck_err_on(!a->io_time[READ], trans, alloc_key_cached_but_read_time_zero, @@ -1669,6 +1678,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, goto err; err: fsck_err: + bch2_dev_put(ca); printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/alloc_background_format.h b/fs/bcachefs/alloc_background_format.h index f754a2951d8aab..befdaa95c515b8 100644 --- a/fs/bcachefs/alloc_background_format.h +++ b/fs/bcachefs/alloc_background_format.h @@ -70,7 +70,7 @@ struct bch_alloc_v4 { __u32 stripe; __u32 nr_external_backpointers; /* end of fields in original version of alloc_v4 */ - __u64 fragmentation_lru; + __u64 _fragmentation_lru; /* obsolete */ __u32 stripe_sectors; __u32 pad; } __packed __aligned(8); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 660d2fa02da21c..771154e3a2916d 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -828,8 +828,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans, return ret; } - gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca); - if (fsck_err_on(new.data_type != gc.data_type, trans, alloc_key_data_type_wrong, "bucket %llu:%llu gen %u has wrong data_type" @@ -857,7 +855,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans, copy_bucket_field(alloc_key_cached_sectors_wrong, cached_sectors); copy_bucket_field(alloc_key_stripe_wrong, stripe); copy_bucket_field(alloc_key_stripe_redundancy_wrong, stripe_redundancy); - copy_bucket_field(alloc_key_fragmentation_lru_wrong, fragmentation_lru); #undef copy_bucket_field if (!bch2_alloc_v4_cmp(*old, new)) diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index dd5df668131a0d..10857eccdeafe8 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -133,7 +133,9 @@ static int bch2_check_lru_key(struct btree_trans *trans, u64 idx; int ret; - if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), + struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, alloc_pos); + + if (fsck_err_on(!ca, trans, lru_entry_to_invalid_bucket, "lru key points to nonexistent device:bucket %llu:%llu", alloc_pos.inode, alloc_pos.offset)) @@ -151,7 +153,7 @@ static int bch2_check_lru_key(struct btree_trans *trans, idx = alloc_lru_idx_read(*a); break; case BCH_LRU_fragmentation: - idx = a->fragmentation_lru; + idx = alloc_lru_idx_fragmentation(*a, ca); break; } @@ -174,6 +176,7 @@ static int bch2_check_lru_key(struct btree_trans *trans, err: fsck_err: bch2_trans_iter_exit(trans, &iter); + bch2_dev_put(ca); printbuf_exit(&buf2); printbuf_exit(&buf1); return ret; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 7d3920e03742de..8c456d8b8b997e 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -692,7 +692,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, a = bch2_alloc_to_v4(k, &a_convert); dirty_sectors = bch2_bucket_sectors_dirty(*a); bucket_size = ca->mi.bucket_size; - fragmentation = a->fragmentation_lru; + fragmentation = alloc_lru_idx_fragmentation(*a, ca); ret = bch2_btree_write_buffer_tryflush(trans); bch_err_msg(c, ret, "flushing btree write buffer"); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index d86565bf07c8c5..d658be90f7378f 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -73,6 +73,7 @@ move_bucket_in_flight_add(struct buckets_in_flight *list, struct move_bucket b) static int bch2_bucket_is_movable(struct btree_trans *trans, struct move_bucket *b, u64 time) { + struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; struct bch_alloc_v4 _a; @@ -90,14 +91,19 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, if (ret) return ret; + struct bch_dev *ca = bch2_dev_tryget(c, k.k->p.inode); + if (!ca) + goto out; + a = bch2_alloc_to_v4(k, &_a); b->k.gen = a->gen; b->sectors = bch2_bucket_sectors_dirty(*a); + u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca); - ret = data_type_movable(a->data_type) && - a->fragmentation_lru && - a->fragmentation_lru <= time; + ret = lru_idx && lru_idx <= time; + bch2_dev_put(ca); +out: bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 6a63d24180cafb..b4024870b65e88 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -115,8 +115,8 @@ enum bch_fsck_flags { x(alloc_key_data_type_inconsistency, 101, 0) \ x(alloc_key_to_missing_dev_bucket, 102, 0) \ x(alloc_key_cached_inconsistency, 103, 0) \ - x(alloc_key_cached_but_read_time_zero, 104, 0) \ - x(alloc_key_to_missing_lru_entry, 105, 0) \ + x(alloc_key_cached_but_read_time_zero, 104, FSCK_AUTOFIX) \ + x(alloc_key_to_missing_lru_entry, 105, FSCK_AUTOFIX) \ x(alloc_key_data_type_wrong, 106, FSCK_AUTOFIX) \ x(alloc_key_gen_wrong, 107, FSCK_AUTOFIX) \ x(alloc_key_dirty_sectors_wrong, 108, FSCK_AUTOFIX) \ From 1c6051bbd76b2767d6acef6a1d0bdf99aa319273 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 28 Sep 2024 15:27:37 -0400 Subject: [PATCH 28/33] bcachefs: Check for directories with no backpointers It's legal for regular files to have missing backpointers (due to hardlinks), and fsck should automatically add them, but for directories this is an error that should be flagged. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 22 +++++++++++++++------- fs/bcachefs/sb-errors_format.h | 3 ++- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index f0d6696c4df616..d2aa6a5bc973e6 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1826,6 +1826,21 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, if (inode_points_to_dirent(target, d)) return 0; + if (!target->bi_dir && + !target->bi_dir_offset) { + fsck_err_on(S_ISDIR(target->bi_mode), + trans, inode_dir_missing_backpointer, + "directory with missing backpointer\n%s", + (bch2_bkey_val_to_text(&buf, c, d.s_c), + prt_printf(&buf, "\n "), + bch2_inode_unpacked_to_text(&buf, target), + buf.buf)); + + target->bi_dir = d.k->p.inode; + target->bi_dir_offset = d.k->p.offset; + return __bch2_fsck_write_inode(trans, target, target_snapshot); + } + if (bch2_inode_should_have_bp(target) && !fsck_err(trans, inode_wrong_backpointer, "dirent points to inode that does not point back:\n %s", @@ -1835,13 +1850,6 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, buf.buf))) goto err; - if (!target->bi_dir && - !target->bi_dir_offset) { - target->bi_dir = d.k->p.inode; - target->bi_dir_offset = d.k->p.offset; - return __bch2_fsck_write_inode(trans, target, target_snapshot); - } - struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); ret = bkey_err(bp_dirent); diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index b4024870b65e88..fc3d31ed5975c8 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -219,6 +219,7 @@ enum bch_fsck_flags { x(inode_i_sectors_wrong, 204, FSCK_AUTOFIX) \ x(inode_dir_wrong_nlink, 205, FSCK_AUTOFIX) \ x(inode_dir_multiple_links, 206, FSCK_AUTOFIX) \ + x(inode_dir_missing_backpointer, 284, FSCK_AUTOFIX) \ x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \ x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \ x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ @@ -295,7 +296,7 @@ enum bch_fsck_flags { x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(MAX, 284, 0) + x(MAX, 285, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From c7da5ee2e5cc30faca49e3ea9dbecf8f6ee4f1ea Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 29 Sep 2024 22:38:04 -0400 Subject: [PATCH 29/33] bcachefs: Check for unlinked inodes with dirents link count works differently in bcachefs - it's only nonzero for files with multiple hardlinks, which means we can also avoid checking it except for files that are known to have hardlinks. That means we need a few different checks instead; in particular, we don't want fsck to delet a file that has a dirent pointing to it. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 53 +++++++++++++++++++++++++--------- fs/bcachefs/sb-errors_format.h | 3 +- 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index d2aa6a5bc973e6..09f890539ded4e 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1029,6 +1029,7 @@ static int check_inode(struct btree_trans *trans, bool full) { struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; struct bch_inode_unpacked u; bool do_update = false; int ret; @@ -1062,7 +1063,24 @@ static int check_inode(struct btree_trans *trans, trans, inode_snapshot_mismatch, "inodes in different snapshots don't match")) { bch_err(c, "repair not implemented yet"); - return -BCH_ERR_fsck_repair_unimplemented; + ret = -BCH_ERR_fsck_repair_unimplemented; + goto err_noprint; + } + + if (u.bi_dir || u.bi_dir_offset) { + ret = check_inode_dirent_inode(trans, &u, &do_update); + if (ret) + goto err; + } + + if (fsck_err_on(u.bi_dir && (u.bi_flags & BCH_INODE_unlinked), + trans, inode_unlinked_but_has_dirent, + "inode unlinked but has dirent\n%s", + (printbuf_reset(&buf), + bch2_inode_unpacked_to_text(&buf, &u), + buf.buf))) { + u.bi_flags &= ~BCH_INODE_unlinked; + do_update = true; } if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && @@ -1079,11 +1097,11 @@ static int check_inode(struct btree_trans *trans, bch_err_msg(c, ret, "in fsck updating inode"); if (ret) - return ret; + goto err_noprint; if (!bpos_eq(new_min_pos, POS_MIN)) bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos)); - return 0; + goto err_noprint; } if (u.bi_flags & BCH_INODE_unlinked) { @@ -1100,7 +1118,7 @@ static int check_inode(struct btree_trans *trans, */ ret = check_inode_deleted_list(trans, k.k->p); if (ret < 0) - return ret; + goto err_noprint; fsck_err_on(!ret, trans, unlinked_inode_not_on_deleted_list, @@ -1117,7 +1135,7 @@ static int check_inode(struct btree_trans *trans, u.bi_inum, u.bi_snapshot)) { ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); bch_err_msg(c, ret, "in fsck deleting inode"); - return ret; + goto err_noprint; } } } @@ -1182,12 +1200,6 @@ static int check_inode(struct btree_trans *trans, do_update = true; } - if (u.bi_dir || u.bi_dir_offset) { - ret = check_inode_dirent_inode(trans, &u, &do_update); - if (ret) - goto err; - } - if (fsck_err_on(u.bi_parent_subvol && (u.bi_subvol == 0 || u.bi_subvol == BCACHEFS_ROOT_SUBVOL), @@ -1232,11 +1244,13 @@ static int check_inode(struct btree_trans *trans, ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); bch_err_msg(c, ret, "in fsck updating inode"); if (ret) - return ret; + goto err_noprint; } err: fsck_err: bch_err_fn(c, ret); +err_noprint: + printbuf_exit(&buf); return ret; } @@ -1831,11 +1845,22 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, fsck_err_on(S_ISDIR(target->bi_mode), trans, inode_dir_missing_backpointer, "directory with missing backpointer\n%s", - (bch2_bkey_val_to_text(&buf, c, d.s_c), - prt_printf(&buf, "\n "), + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, d.s_c), + prt_printf(&buf, "\n"), bch2_inode_unpacked_to_text(&buf, target), buf.buf)); + fsck_err_on(target->bi_flags & BCH_INODE_unlinked, + trans, inode_unlinked_but_has_dirent, + "inode unlinked but has dirent\n%s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, d.s_c), + prt_printf(&buf, "\n"), + bch2_inode_unpacked_to_text(&buf, target), + buf.buf)); + + target->bi_flags &= ~BCH_INODE_unlinked; target->bi_dir = d.k->p.inode; target->bi_dir_offset = d.k->p.offset; return __bch2_fsck_write_inode(trans, target, target_snapshot); diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index fc3d31ed5975c8..4c8ff4cc17c239 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -211,6 +211,7 @@ enum bch_fsck_flags { x(inode_unlinked_but_clean, 197, 0) \ x(inode_unlinked_but_nlink_nonzero, 198, 0) \ x(inode_unlinked_and_not_open, 281, 0) \ + x(inode_unlinked_but_has_dirent, 285, 0) \ x(inode_checksum_type_invalid, 199, 0) \ x(inode_compression_type_invalid, 200, 0) \ x(inode_subvol_root_but_not_dir, 201, 0) \ @@ -296,7 +297,7 @@ enum bch_fsck_flags { x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(MAX, 285, 0) + x(MAX, 286, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From c9306a91c3fdc9915f5408561ea432c70b03383b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 29 Sep 2024 23:38:37 -0400 Subject: [PATCH 30/33] bcachefs: Check for unlinked, non-empty dirs in check_inode() We want to check for this early so it can be reattached if necessary in check_unreachable_inodes(); better than letting it be deleted and having the children reattached, losing their filenames. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 17 +++++++++++++++++ fs/bcachefs/sb-errors_format.h | 3 ++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 09f890539ded4e..30eca76554f6dc 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1083,6 +1083,23 @@ static int check_inode(struct btree_trans *trans, do_update = true; } + if (S_ISDIR(u.bi_mode) && (u.bi_flags & BCH_INODE_unlinked)) { + /* Check for this early so that check_unreachable_inode() will reattach it */ + + ret = bch2_empty_dir_snapshot(trans, k.k->p.offset, 0, k.k->p.snapshot); + if (ret && ret != -BCH_ERR_ENOTEMPTY_dir_not_empty) + goto err; + + fsck_err_on(ret, trans, inode_dir_unlinked_but_not_empty, + "dir unlinked but not empty\n%s", + (printbuf_reset(&buf), + bch2_inode_unpacked_to_text(&buf, &u), + buf.buf)); + u.bi_flags &= ~BCH_INODE_unlinked; + do_update = true; + ret = 0; + } + if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) { struct bpos new_min_pos; diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 4c8ff4cc17c239..4135b1ea2feccd 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -221,6 +221,7 @@ enum bch_fsck_flags { x(inode_dir_wrong_nlink, 205, FSCK_AUTOFIX) \ x(inode_dir_multiple_links, 206, FSCK_AUTOFIX) \ x(inode_dir_missing_backpointer, 284, FSCK_AUTOFIX) \ + x(inode_dir_unlinked_but_not_empty, 286, FSCK_AUTOFIX) \ x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \ x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \ x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ @@ -297,7 +298,7 @@ enum bch_fsck_flags { x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(MAX, 286, 0) + x(MAX, 287, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From 72350ee0ea22c053f2683e50f1beba97df2ad053 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 30 Sep 2024 00:00:33 -0400 Subject: [PATCH 31/33] bcachefs: Kill snapshot arg to fsck_write_inode() It was initially believed that it would be better to be explicit about the snapshot we're updating when writing inodes in fsck; however, it turns out that passing around the snapshot separately is more error prone and we're usually updating the inode in the same snapshow we read it from. This is different from normal filesystem paths, where we do the update in the snapshot of the subvolume we're in. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 87 ++++++++++++++++++++--------------------- fs/bcachefs/inode.c | 12 ++---- fs/bcachefs/inode.h | 4 +- fs/bcachefs/subvolume.c | 3 +- 4 files changed, 51 insertions(+), 55 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 30eca76554f6dc..b8a6ceb0cc7a3b 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -137,16 +137,15 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, return ret; } -static int lookup_inode(struct btree_trans *trans, u64 inode_nr, - struct bch_inode_unpacked *inode, - u32 *snapshot) +static int lookup_inode(struct btree_trans *trans, u64 inode_nr, u32 snapshot, + struct bch_inode_unpacked *inode) { struct btree_iter iter; struct bkey_s_c k; int ret; k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, - SPOS(0, inode_nr, *snapshot), 0); + SPOS(0, inode_nr, snapshot), 0); ret = bkey_err(k); if (ret) goto err; @@ -154,8 +153,6 @@ static int lookup_inode(struct btree_trans *trans, u64 inode_nr, ret = bkey_is_inode(k.k) ? bch2_inode_unpack(k, inode) : -BCH_ERR_ENOENT_inode; - if (!ret) - *snapshot = iter.pos.snapshot; err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -250,8 +247,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, struct bch_inode_unpacked root_inode; struct bch_hash_info root_hash_info; - u32 root_inode_snapshot = snapshot; - ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); + ret = lookup_inode(trans, root_inum.inum, snapshot, &root_inode); bch_err_msg(c, ret, "looking up root inode %llu for subvol %u", root_inum.inum, le32_to_cpu(st.master_subvol)); if (ret) @@ -277,7 +273,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, * The bch2_check_dirents pass has already run, dangling dirents * shouldn't exist here: */ - ret = lookup_inode(trans, inum, lostfound, &snapshot); + ret = lookup_inode(trans, inum, snapshot, lostfound); bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)", inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot)); return ret; @@ -302,6 +298,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, bch2_inode_init_early(c, lostfound); bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode); lostfound->bi_dir = root_inode.bi_inum; + lostfound->bi_snapshot = le32_to_cpu(st.root_snapshot); root_inode.bi_nlink++; @@ -329,9 +326,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, return ret; } -static int reattach_inode(struct btree_trans *trans, - struct bch_inode_unpacked *inode, - u32 inode_snapshot) +static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) { struct bch_fs *c = trans->c; struct bch_hash_info dir_hash; @@ -339,7 +334,7 @@ static int reattach_inode(struct btree_trans *trans, char name_buf[20]; struct qstr name; u64 dir_offset = 0; - u32 dirent_snapshot = inode_snapshot; + u32 dirent_snapshot = inode->bi_snapshot; int ret; if (inode->bi_subvol) { @@ -363,7 +358,12 @@ static int reattach_inode(struct btree_trans *trans, lostfound.bi_nlink += S_ISDIR(inode->bi_mode); /* ensure lost+found inode is also present in inode snapshot */ - ret = __bch2_fsck_write_inode(trans, &lostfound, inode_snapshot); + if (!inode->bi_subvol) { + BUG_ON(!bch2_snapshot_is_ancestor(c, inode->bi_snapshot, lostfound.bi_snapshot)); + lostfound.bi_snapshot = inode->bi_snapshot; + } + + ret = __bch2_fsck_write_inode(trans, &lostfound); if (ret) return ret; @@ -388,7 +388,7 @@ static int reattach_inode(struct btree_trans *trans, inode->bi_dir = lostfound.bi_inum; inode->bi_dir_offset = dir_offset; - return __bch2_fsck_write_inode(trans, inode, inode_snapshot); + return __bch2_fsck_write_inode(trans, inode); } static int remove_backpointer(struct btree_trans *trans, @@ -427,7 +427,7 @@ static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume if (ret) return ret; - ret = reattach_inode(trans, &inode, le32_to_cpu(s.v->snapshot)); + ret = reattach_inode(trans, &inode); bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); return ret; } @@ -545,8 +545,9 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, i_mode|0600, 0, NULL); new_inode.bi_size = i_size; new_inode.bi_inum = inum; + new_inode.bi_snapshot = snapshot; - return __bch2_fsck_write_inode(trans, &new_inode, snapshot); + return __bch2_fsck_write_inode(trans, &new_inode); } struct snapshots_seen { @@ -1110,7 +1111,7 @@ static int check_inode(struct btree_trans *trans, u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked; - ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); + ret = __bch2_fsck_write_inode(trans, &u); bch_err_msg(c, ret, "in fsck updating inode"); if (ret) @@ -1258,7 +1259,7 @@ static int check_inode(struct btree_trans *trans, } do_update: if (do_update) { - ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); + ret = __bch2_fsck_write_inode(trans, &u); bch_err_msg(c, ret, "in fsck updating inode"); if (ret) goto err_noprint; @@ -1383,7 +1384,7 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal w->last_pos.inode, i->snapshot, i->inode.bi_sectors, i->count)) { i->inode.bi_sectors = i->count; - ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); + ret = bch2_fsck_write_inode(trans, &i->inode); if (ret) break; } @@ -1825,7 +1826,7 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_ "directory %llu:%u with wrong i_nlink: got %u, should be %llu", w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { i->inode.bi_nlink = i->count; - ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); + ret = bch2_fsck_write_inode(trans, &i->inode); if (ret) break; } @@ -1846,8 +1847,7 @@ noinline_for_stack static int check_dirent_inode_dirent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c_dirent d, - struct bch_inode_unpacked *target, - u32 target_snapshot) + struct bch_inode_unpacked *target) { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; @@ -1880,7 +1880,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, target->bi_flags &= ~BCH_INODE_unlinked; target->bi_dir = d.k->p.inode; target->bi_dir_offset = d.k->p.offset; - return __bch2_fsck_write_inode(trans, target, target_snapshot); + return __bch2_fsck_write_inode(trans, target); } if (bch2_inode_should_have_bp(target) && @@ -1893,7 +1893,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, goto err; struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, - SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); + SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot)); ret = bkey_err(bp_dirent); if (ret && !bch2_err_matches(ret, ENOENT)) goto err; @@ -1906,14 +1906,14 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, "inode %llu:%u has wrong backpointer:\n" "got %llu:%llu\n" "should be %llu:%llu", - target->bi_inum, target_snapshot, + target->bi_inum, target->bi_snapshot, target->bi_dir, target->bi_dir_offset, d.k->p.inode, d.k->p.offset)) { target->bi_dir = d.k->p.inode; target->bi_dir_offset = d.k->p.offset; - ret = __bch2_fsck_write_inode(trans, target, target_snapshot); + ret = __bch2_fsck_write_inode(trans, target); goto out; } @@ -1928,7 +1928,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, trans, inode_dir_multiple_links, "%s %llu:%u with multiple links\n%s", S_ISDIR(target->bi_mode) ? "directory" : "subvolume", - target->bi_inum, target_snapshot, buf.buf)) { + target->bi_inum, target->bi_snapshot, buf.buf)) { ret = __remove_dirent(trans, d.k->p); goto out; } @@ -1941,10 +1941,10 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, if (fsck_err_on(backpointer_exists && !target->bi_nlink, trans, inode_multiple_links_but_nlink_0, "inode %llu:%u type %s has multiple links but i_nlink 0\n%s", - target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { + target->bi_inum, target->bi_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { target->bi_nlink++; target->bi_flags &= ~BCH_INODE_unlinked; - ret = __bch2_fsck_write_inode(trans, target, target_snapshot); + ret = __bch2_fsck_write_inode(trans, target); if (ret) goto err; } @@ -1961,15 +1961,14 @@ noinline_for_stack static int check_dirent_target(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c_dirent d, - struct bch_inode_unpacked *target, - u32 target_snapshot) + struct bch_inode_unpacked *target) { struct bch_fs *c = trans->c; struct bkey_i_dirent *n; struct printbuf buf = PRINTBUF; int ret = 0; - ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot); + ret = check_dirent_inode_dirent(trans, iter, d, target); if (ret) goto err; @@ -2128,7 +2127,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * u64 target_inum = le64_to_cpu(s.v->inode); u32 target_snapshot = le32_to_cpu(s.v->snapshot); - ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); + ret = lookup_inode(trans, target_inum, target_snapshot, &subvol_root); if (ret && !bch2_err_matches(ret, ENOENT)) goto err; @@ -2144,13 +2143,13 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * target_inum, subvol_root.bi_parent_subvol, parent_subvol)) { subvol_root.bi_parent_subvol = parent_subvol; - ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot); + subvol_root.bi_snapshot = le32_to_cpu(s.v->snapshot); + ret = __bch2_fsck_write_inode(trans, &subvol_root); if (ret) goto err; } - ret = check_dirent_target(trans, iter, d, &subvol_root, - target_snapshot); + ret = check_dirent_target(trans, iter, d, &subvol_root); if (ret) goto err; out: @@ -2243,8 +2242,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, } darray_for_each(target->inodes, i) { - ret = check_dirent_target(trans, iter, d, - &i->inode, i->snapshot); + ret = check_dirent_target(trans, iter, d, &i->inode); if (ret) goto err; } @@ -2385,7 +2383,7 @@ static int check_root_trans(struct btree_trans *trans) goto err; } - ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); + ret = lookup_inode(trans, BCACHEFS_ROOT_INO, snapshot, &root_inode); if (ret && !bch2_err_matches(ret, ENOENT)) return ret; @@ -2398,8 +2396,9 @@ static int check_root_trans(struct btree_trans *trans) bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 0, NULL); root_inode.bi_inum = inum; + root_inode.bi_snapshot = snapshot; - ret = __bch2_fsck_write_inode(trans, &root_inode, snapshot); + ret = __bch2_fsck_write_inode(trans, &root_inode); bch_err_msg(c, ret, "writing root inode"); } err: @@ -2566,7 +2565,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf))) - ret = reattach_inode(trans, &inode, snapshot); + ret = reattach_inode(trans, &inode); goto out; } @@ -2612,7 +2611,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino if (ret) break; - ret = reattach_inode(trans, &inode, snapshot); + ret = reattach_inode(trans, &inode); bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); } break; @@ -2842,7 +2841,7 @@ static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_ite u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], bch2_inode_nlink_get(&u), link->count)) { bch2_inode_nlink_set(&u, link->count); - ret = __bch2_fsck_write_inode(trans, &u, k.k->p.snapshot); + ret = __bch2_fsck_write_inode(trans, &u); } fsck_err: return ret; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 753c208896c3be..19aa31c6812f40 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -387,9 +387,7 @@ int bch2_inode_write_flags(struct btree_trans *trans, return bch2_trans_update(trans, iter, &inode_p->inode.k_i, flags); } -int __bch2_fsck_write_inode(struct btree_trans *trans, - struct bch_inode_unpacked *inode, - u32 snapshot) +int __bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) { struct bkey_inode_buf *inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p)); @@ -398,19 +396,17 @@ int __bch2_fsck_write_inode(struct btree_trans *trans, return PTR_ERR(inode_p); bch2_inode_pack(inode_p, inode); - inode_p->inode.k.p.snapshot = snapshot; + inode_p->inode.k.p.snapshot = inode->bi_snapshot; return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes, &inode_p->inode.k_i, BTREE_UPDATE_internal_snapshot_node); } -int bch2_fsck_write_inode(struct btree_trans *trans, - struct bch_inode_unpacked *inode, - u32 snapshot) +int bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) { int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - __bch2_fsck_write_inode(trans, inode, snapshot)); + __bch2_fsck_write_inode(trans, inode)); bch_err_fn(trans->c, ret); return ret; } diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 695abd707cb6a5..f597d10a252d13 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -112,8 +112,8 @@ static inline int bch2_inode_write(struct btree_trans *trans, return bch2_inode_write_flags(trans, iter, inode, 0); } -int __bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *, u32); -int bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *, u32); +int __bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *); +int bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *); void bch2_inode_init_early(struct bch_fs *, struct bch_inode_unpacked *); diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 6845dde1b3397a..44210a86c3674d 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -102,7 +102,8 @@ static int check_subvol(struct btree_trans *trans, inode.bi_inum, inode.bi_snapshot, inode.bi_subvol, subvol.k->p.offset)) { inode.bi_subvol = subvol.k->p.offset; - ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot)); + inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot); + ret = __bch2_fsck_write_inode(trans, &inode); if (ret) goto err; } From 1f73cb4d34e787b3671f1e9d527eb8cf72c05283 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 24 Sep 2024 05:33:07 -0400 Subject: [PATCH 32/33] bcachefs: Add warn param to subvol_get_snapshot, peek_inode These shouldn't always be fatal errors - logged op resume, in particular, and we want it as a parameter there. Signed-off-by: Kent Overstreet --- fs/bcachefs/inode.c | 32 +++++++++++--------------------- fs/bcachefs/inode.h | 24 ++++++++++++++++++++---- fs/bcachefs/subvolume.c | 13 ++++++++++--- fs/bcachefs/subvolume.h | 2 ++ 4 files changed, 43 insertions(+), 28 deletions(-) diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 19aa31c6812f40..74d7a42ba1a215 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -327,22 +327,20 @@ int bch2_inode_unpack(struct bkey_s_c k, : bch2_inode_unpack_slowpath(k, unpacked); } -int bch2_inode_peek_nowarn(struct btree_trans *trans, - struct btree_iter *iter, - struct bch_inode_unpacked *inode, - subvol_inum inum, unsigned flags) +int __bch2_inode_peek(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode, + subvol_inum inum, unsigned flags, + bool warn) { - struct bkey_s_c k; u32 snapshot; - int ret; - - ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); + int ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn); if (ret) return ret; - k = bch2_bkey_get_iter(trans, iter, BTREE_ID_inodes, - SPOS(0, inum.inum, snapshot), - flags|BTREE_ITER_cached); + struct bkey_s_c k = bch2_bkey_get_iter(trans, iter, BTREE_ID_inodes, + SPOS(0, inum.inum, snapshot), + flags|BTREE_ITER_cached); ret = bkey_err(k); if (ret) return ret; @@ -357,20 +355,12 @@ int bch2_inode_peek_nowarn(struct btree_trans *trans, return 0; err: + if (warn) + bch_err_msg(trans->c, ret, "looking up inum %llu:%llu:", inum.subvol, inum.inum); bch2_trans_iter_exit(trans, iter); return ret; } -int bch2_inode_peek(struct btree_trans *trans, - struct btree_iter *iter, - struct bch_inode_unpacked *inode, - subvol_inum inum, unsigned flags) -{ - int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags); - bch_err_msg(trans->c, ret, "looking up inum %llu:%llu:", inum.subvol, inum.inum); - return ret; -} - int bch2_inode_write_flags(struct btree_trans *trans, struct btree_iter *iter, struct bch_inode_unpacked *inode, diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index f597d10a252d13..9c1f6770568421 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -97,10 +97,26 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *, struct bkey_i *); void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *); -int bch2_inode_peek_nowarn(struct btree_trans *, struct btree_iter *, - struct bch_inode_unpacked *, subvol_inum, unsigned); -int bch2_inode_peek(struct btree_trans *, struct btree_iter *, - struct bch_inode_unpacked *, subvol_inum, unsigned); +int __bch2_inode_peek(struct btree_trans *, struct btree_iter *, + struct bch_inode_unpacked *, subvol_inum, unsigned, bool); + +static inline int bch2_inode_peek_nowarn(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode, + subvol_inum inum, unsigned flags) +{ + return __bch2_inode_peek(trans, iter, inode, inum, flags, false); +} + +static inline int bch2_inode_peek(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode, + subvol_inum inum, unsigned flags) +{ + return __bch2_inode_peek(trans, iter, inode, inum, flags, true); + int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags); + return ret; +} int bch2_inode_write_flags(struct btree_trans *, struct btree_iter *, struct bch_inode_unpacked *, enum btree_iter_update_trigger_flags); diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 44210a86c3674d..91d8187ee168af 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -332,8 +332,8 @@ int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol); } -int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, - u32 *snapid) +int __bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, + u32 *snapid, bool warn) { struct btree_iter iter; struct bkey_s_c_subvolume subvol; @@ -344,7 +344,8 @@ int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, BTREE_ITER_cached|BTREE_ITER_with_updates, subvolume); ret = bkey_err(subvol); - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, + + bch2_fs_inconsistent_on(warn && bch2_err_matches(ret, ENOENT), trans->c, "missing subvolume %u", subvolid); if (likely(!ret)) @@ -353,6 +354,12 @@ int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, return ret; } +int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, + u32 *snapid) +{ + return __bch2_subvolume_get_snapshot(trans, subvolid, snapid, true); +} + static int bch2_subvolume_reparent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index e62f876541fe25..f897d106e14260 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -26,6 +26,8 @@ int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned, int bch2_subvol_has_children(struct btree_trans *, u32); int bch2_subvolume_get(struct btree_trans *, unsigned, bool, int, struct bch_subvolume *); +int __bch2_subvolume_get_snapshot(struct btree_trans *, u32, + u32 *, bool); int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); int bch2_subvol_is_ro_trans(struct btree_trans *, u32); From 0f25eb4b60771f08fbcca878a8f7f88086d0c885 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 22:06:58 -0400 Subject: [PATCH 33/33] bcachefs: Rework logged op error handling Initially it was thought that we just wanted to ignore errors from logged op replay, but it turns out we do need to catch -EROFS, or we'll go into an infinite loop. Signed-off-by: Kent Overstreet --- fs/bcachefs/io_misc.c | 63 +++++++++++++++++++++++++++------------- fs/bcachefs/logged_ops.c | 16 +++++----- fs/bcachefs/logged_ops.h | 2 +- 3 files changed, 53 insertions(+), 28 deletions(-) diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 177ed331c00b1d..307ed0a4518449 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -224,13 +224,14 @@ void bch2_logged_op_truncate_to_text(struct printbuf *out, struct bch_fs *c, str static int truncate_set_isize(struct btree_trans *trans, subvol_inum inum, - u64 new_i_size) + u64 new_i_size, + bool warn) { struct btree_iter iter = { NULL }; struct bch_inode_unpacked inode_u; int ret; - ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent) ?: + ret = __bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent, warn) ?: (inode_u.bi_size = new_i_size, 0) ?: bch2_inode_write(trans, &iter, &inode_u); @@ -247,10 +248,11 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans, struct bkey_i_logged_op_truncate *op = bkey_i_to_logged_op_truncate(op_k); subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; u64 new_i_size = le64_to_cpu(op->v.new_i_size); + bool warn_errors = i_sectors_delta != NULL; int ret; ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - truncate_set_isize(trans, inum, new_i_size)); + truncate_set_isize(trans, inum, new_i_size, i_sectors_delta != NULL)); if (ret) goto err; @@ -263,8 +265,8 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans, if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; err: - bch2_logged_op_finish(trans, op_k); - bch_err_fn(c, ret); + if (warn_errors) + bch_err_fn(c, ret); return ret; } @@ -288,9 +290,14 @@ int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sec * resume only proceeding in one of the snapshots */ down_read(&c->snapshot_create_lock); - int ret = bch2_trans_run(c, - bch2_logged_op_start(trans, &op.k_i) ?: - __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta)); + struct btree_trans *trans = bch2_trans_get(c); + int ret = bch2_logged_op_start(trans, &op.k_i); + if (ret) + goto out; + ret = __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta); + ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; +out: + bch2_trans_put(trans); up_read(&c->snapshot_create_lock); return ret; @@ -308,7 +315,8 @@ void bch2_logged_op_finsert_to_text(struct printbuf *out, struct bch_fs *c, stru prt_printf(out, " src_offset=%llu", le64_to_cpu(op.v->src_offset)); } -static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len) +static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, + u64 offset, s64 len, bool warn) { struct btree_iter iter; struct bch_inode_unpacked inode_u; @@ -317,7 +325,7 @@ static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset offset <<= 9; len <<= 9; - ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent); + ret = __bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent, warn); if (ret) return ret; @@ -357,12 +365,22 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, u64 len = abs(shift); u64 pos = le64_to_cpu(op->v.pos); bool insert = shift > 0; + u32 snapshot; + bool warn_errors = i_sectors_delta != NULL; int ret = 0; ret = bch2_inum_opts_get(trans, inum, &opts); if (ret) return ret; + /* + * check for missing subvolume before fpunch, as in resume we don't want + * it to be a fatal error + */ + ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn_errors); + if (ret) + return ret; + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(inum.inum, 0), BTREE_ITER_intent); @@ -373,7 +391,7 @@ case LOGGED_OP_FINSERT_start: if (insert) { ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - adjust_i_size(trans, inum, src_offset, len) ?: + adjust_i_size(trans, inum, src_offset, len, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); if (ret) goto err; @@ -396,11 +414,11 @@ case LOGGED_OP_FINSERT_shift_extents: struct bkey_i delete, *copy; struct bkey_s_c k; struct bpos src_pos = POS(inum.inum, src_offset); - u32 snapshot; bch2_trans_begin(trans); - ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); + ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, + warn_errors); if (ret) goto btree_err; @@ -463,12 +481,12 @@ case LOGGED_OP_FINSERT_shift_extents: if (!insert) { ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - adjust_i_size(trans, inum, src_offset, shift) ?: + adjust_i_size(trans, inum, src_offset, shift, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); } else { /* We need an inode update to update bi_journal_seq for fsync: */ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - adjust_i_size(trans, inum, 0, 0) ?: + adjust_i_size(trans, inum, 0, 0, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); } @@ -477,9 +495,9 @@ case LOGGED_OP_FINSERT_finish: break; } err: - bch_err_fn(c, ret); - bch2_logged_op_finish(trans, op_k); bch2_trans_iter_exit(trans, &iter); + if (warn_errors) + bch_err_fn(c, ret); return ret; } @@ -508,9 +526,14 @@ int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum, * resume only proceeding in one of the snapshots */ down_read(&c->snapshot_create_lock); - int ret = bch2_trans_run(c, - bch2_logged_op_start(trans, &op.k_i) ?: - __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta)); + struct btree_trans *trans = bch2_trans_get(c); + int ret = bch2_logged_op_start(trans, &op.k_i); + if (ret) + goto out; + ret = __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta); + ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; +out: + bch2_trans_put(trans); up_read(&c->snapshot_create_lock); return ret; diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c index 6f4a4e1083c955..60e00702d1a46e 100644 --- a/fs/bcachefs/logged_ops.c +++ b/fs/bcachefs/logged_ops.c @@ -34,8 +34,6 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { struct bch_fs *c = trans->c; - const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type); - struct bkey_buf sk; u32 restart_count = trans->restart_count; struct printbuf buf = PRINTBUF; int ret = 0; @@ -46,13 +44,15 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - if (!fn) - return 0; - + struct bkey_buf sk; bch2_bkey_buf_init(&sk); bch2_bkey_buf_reassemble(&sk, c, k); - fn->resume(trans, sk.k); + const struct bch_logged_op_fn *fn = logged_op_fn(sk.k->k.type); + if (fn) + fn->resume(trans, sk.k); + + ret = bch2_logged_op_finish(trans, sk.k); bch2_bkey_buf_exit(&sk, c); fsck_err: @@ -93,7 +93,7 @@ int bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k) __bch2_logged_op_start(trans, k)); } -void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k) +int bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k) { int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_btree_delete(trans, BTREE_ID_logged_ops, k->k.p, 0)); @@ -113,4 +113,6 @@ void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k) buf.buf, bch2_err_str(ret)); printbuf_exit(&buf); } + + return ret; } diff --git a/fs/bcachefs/logged_ops.h b/fs/bcachefs/logged_ops.h index 4d1e786a27a89f..30ae9ef737dd95 100644 --- a/fs/bcachefs/logged_ops.h +++ b/fs/bcachefs/logged_ops.h @@ -15,6 +15,6 @@ static inline int bch2_logged_op_update(struct btree_trans *trans, struct bkey_i int bch2_resume_logged_ops(struct bch_fs *); int bch2_logged_op_start(struct btree_trans *, struct bkey_i *); -void bch2_logged_op_finish(struct btree_trans *, struct bkey_i *); +int bch2_logged_op_finish(struct btree_trans *, struct bkey_i *); #endif /* _BCACHEFS_LOGGED_OPS_H */